diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100755 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..99d653079744c482d24b3bb42d093004cbaed387 --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c95839b2badd36d756ee7d82ab6270d8f29b298d076567c36c4da227dfe352 +size 540327017 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc9705fc6ea62ebc3e48fd3171d6e9dabcb538d9 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc739429c528fd605ae4ced7759636a9f803147f16903ea462bda910a4c05901 +size 1055464817 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..79b619a49fbe3fb008620e68e44a094a48bae82b --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31aab2a1f697209424858a3dae8d5f27b1bf74ea42f8e9d7ce4f7ce029ba6c1b +size 540327017 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4a711bf9142ceba18c16a29ca58f5c541e97d5f8 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88c7f81683e9c433c27ca5798046c1a73abcb05f9124581b35a060a57da7ddb +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f25e34366f0fa5f68b3fda7b4deba073edb99469 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.013333333333333, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7083964284928e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3bf3823abc761e061df3460070eb664648d3efc --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7e4d3ae8b97ce671dd779cb46001aeabf21d652f1788c423ae838cd616cd90c +size 1055465009 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8b217acdf4c1ed19b45d1e867859a2e2d6ab2c9 --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a9d5ef39e00bb16f938f37fa013561bf784cfc9470dd8e2998eaa4f075fdd4 +size 540327017 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..eee166f72ba5c6a198d80f1f0ae739120d6c85e0 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f621d01abc43f169114be8707d674141dc4877a4ce21b6eaa66ebbe555399b1 +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a73f3de1da0032ba9fbd59d2df722f54aae8451e --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 35.016666666666666, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.1974, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 5.1091, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.9724, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.8054, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 4.5929, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.3775, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.2825, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 4.1618, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.0581, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.9257, + "step": 700 + }, + { + "epoch": 26.02, + "learning_rate": 0.00025, + "loss": 3.8778, + "step": 750 + }, + { + "epoch": 28.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.8028, + "step": 800 + }, + { + "epoch": 30.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.7326, + "step": 850 + }, + { + "epoch": 32.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.6708, + "step": 900 + }, + { + "epoch": 33.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.5837, + "step": 950 + }, + { + "epoch": 35.02, + "learning_rate": 0.00011111111111111109, + "loss": 3.5664, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_loss": 4.406866073608398, + "eval_runtime": 1.7783, + "eval_samples_per_second": 52.296, + "eval_steps_per_second": 3.374, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 4.406866073608398, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 82.01204064414016, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.7783, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 52.296, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7138574319616e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8871d358ff0faab81f7fbe9217f9a9b09a2be7e5 --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4496933175c764e1f1f926aa2c37770d2fa8171c23e72b872e3219067c8241 +size 1055465009 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5d0e825dd4ee0e79ee646c49f360cfb6265c1633 --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db99190cd2c58fe85d05733a780dc0910516d5d914a65acea83894370e0f2570 +size 540327017 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..706bb06ed7e322556335afa5e8472c37f65f7dea --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628aea9ba8b273e3febbd2bdddc8bc1372302c6f70d3aa4baeef5350db491caa +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4e444dc5a2eafd1b28db41fccc524625e9888514 --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 39.00666666666667, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.1974, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 5.1091, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.9724, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.8054, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 4.5929, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.3775, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.2825, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 4.1618, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.0581, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.9257, + "step": 700 + }, + { + "epoch": 26.02, + "learning_rate": 0.00025, + "loss": 3.8778, + "step": 750 + }, + { + "epoch": 28.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.8028, + "step": 800 + }, + { + "epoch": 30.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.7326, + "step": 850 + }, + { + "epoch": 32.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.6708, + "step": 900 + }, + { + "epoch": 33.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.5837, + "step": 950 + }, + { + "epoch": 35.02, + "learning_rate": 0.00011111111111111109, + "loss": 3.5664, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_loss": 4.406866073608398, + "eval_runtime": 1.7783, + "eval_samples_per_second": 52.296, + "eval_steps_per_second": 3.374, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 4.406866073608398, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 82.01204064414016, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.7783, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 52.296, + "step": 1000 + }, + { + "epoch": 37.01, + "learning_rate": 8.333333333333333e-05, + "loss": 3.5206, + "step": 1050 + }, + { + "epoch": 39.01, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.4829, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.98578927550464e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..678d2d146d83816f1b163f006bfe995f944b2295 --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16cfb7f789608e9b07e387c4b768348fcb4708cf397a2d738c866d82b83557d2 +size 1055465009 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9a4f7619a791f31eca86b197752db03b0e3e9356 --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f345259117a274fb2b1f9c61c5d386fa21d63ba4029129bae3eb685e538f3c8 +size 540327017 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5660c5c7fab156d36cdaf83c8980b8debb4d1157 --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8617129b0ccf2ca19e176281c8972e0d5a2f73be01748074d3419442b844194e +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ba59c8b3979060efca2fa019011339ee197f3bff --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 42.02, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.1974, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 5.1091, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.9724, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.8054, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 4.5929, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.3775, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.2825, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 4.1618, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.0581, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.9257, + "step": 700 + }, + { + "epoch": 26.02, + "learning_rate": 0.00025, + "loss": 3.8778, + "step": 750 + }, + { + "epoch": 28.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.8028, + "step": 800 + }, + { + "epoch": 30.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.7326, + "step": 850 + }, + { + "epoch": 32.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.6708, + "step": 900 + }, + { + "epoch": 33.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.5837, + "step": 950 + }, + { + "epoch": 35.02, + "learning_rate": 0.00011111111111111109, + "loss": 3.5664, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_loss": 4.406866073608398, + "eval_runtime": 1.7783, + "eval_samples_per_second": 52.296, + "eval_steps_per_second": 3.374, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 4.406866073608398, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 82.01204064414016, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.7783, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 52.296, + "step": 1000 + }, + { + "epoch": 37.01, + "learning_rate": 8.333333333333333e-05, + "loss": 3.5206, + "step": 1050 + }, + { + "epoch": 39.01, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.4829, + "step": 1100 + }, + { + "epoch": 41.0, + "learning_rate": 2.7777777777777772e-05, + "loss": 3.4513, + "step": 1150 + }, + { + "epoch": 42.02, + "learning_rate": 0.0, + "loss": 3.3996, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.25662891835392e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa0249312c16cb8c3ad9a09f90fb4b8b323cacd6 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fbf09b8a2712ed4affe6a2e69715d1ee0f49b9502e54032b54f542e3d12dd98 +size 1055464817 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb0df8e59ecf4875bea3334baac90355d306192c --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302ce4ef0d5f3018856bbcfe6c6960e5bf2c9bef96b0a34a397b7e71ee43cd5c +size 540327017 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca83d6bb179c31e7eb72604ecda5933764366022 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91e59131c4c4214c2f5fc6a81647012c0e1c29496bc8df9f012380a1e65973a9 +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1e81606e56e8a5757909ab9bcde51a3f965d46d9 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.003333333333333, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.4277148639232e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d03b6226e30576030370c37d7e4058bda2de244 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40055e01ccc1b9d3dab80a07db54a569aa6320e25d044049c293cffa411cbc31 +size 1055465009 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..784720ae4614ed420b767a75f791d8f776f88714 --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef69bfff026123c6bc33c914ce92ce55104037661d4b43f94e0bc79cf710f6ea +size 540327017 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a3bd4436c98764ff172404de36832328d5d3a5da --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03b23bf2673105a78bda183965f6bbfc135d8c11e253c1fb30b2ed25ad1b8581 +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6d04ffc22c0140e74af5b92555fc174ff9da06c8 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.016666666666667, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.1974, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 5.1091, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.136111292416e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3aea0a33817dcf37e478c5a7b8ffe74dbba3f79 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d5d854ba55a7e69aee96bba00f36f65daca5417ce03e1dde6077417c83b66fd +size 1055465009 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..32e535f62c3bf34ea1f64b01e217afb9bb727544 --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6528315f8ecb970ea80e69fd31a3fbc5ba574ea897879ab6bd3a071e0b767ed +size 540327017 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..916a139cc04c0929294bf5b920749b3c360a6a5f --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f998aa2b69975e530768f9b2f1a2cc253b960470647e495c183a52678a0113 +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..09d98e464e65e81273409feb9aad0e1ddad17da3 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 14.006666666666666, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.1974, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 5.1091, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.9724, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.8054, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.08554297278464e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8acc722f75f98f03961858b412bd9a807b5b44e --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5786aab2de2fa2b7fa5fdf707fdaaa3d39301c292280225ff51140261b53309e +size 1055465009 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..94bae928456200c9516facaf43147ef893646da2 --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f219764579ebc9ab9f52c8be0d0de0f3e836b9a77d6c204eb51fcff23c14869d +size 540327017 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..35aa4d6ab3069cf0925cbe1877ee65b6d951acc4 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b9965ac82750d42390fc99e458715e9b52b2d4b61c08939016f01c273547a6 +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d1a9f48b5540e499a133850d20639cbe51c14a77 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 17.02, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.1974, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 5.1091, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.9724, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.8054, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 4.5929, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.3775, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.35638261563392e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e81b3aadc9d4861d42eeb9cb99726e01c0ae1c1 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:131ba74a1decc0a786f985affcbcd814ea6383a09fe1a39331b13012d13b39f8 +size 1055465009 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2fc5e74cf8b053ceccde2ff5968771f2db9ba0a0 --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6f7a87d48c63b594bbd717becb989906e2ad44f9bfab043c70c3c536b4fa65 +size 540327017 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d8ccdc4cc1a41baaace44e98e54ec195a3fe05a3 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efa35c407a3249d4c62d40ff348e0425e8c21ef1976921e420b9989501af4b1 +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9aafdfe6fa2225055b0babe785d7a644846274a9 --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 21.01, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.1974, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 5.1091, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.9724, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.8054, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 4.5929, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.3775, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.2825, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 4.1618, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.62831445917696e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8acd5f63cdfc1a9f32a8d56870ce8c6b8eee6324 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8872c265c174ce861d50bf94274db1d17c091a25d41b36191e2bb5b868768bf1 +size 1055465009 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbfbf79d37ea6e9f8c2dc3ec31f50091c7a13fc8 --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a09de5eec0dc81a3a8907fd66b86480766efd64af87dab2f72195fadb7d50358 +size 540327017 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ee89f100087fffa8431034233f56f65a0edc97a --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f266776bc0e53b9d5c806bd442aa7fe053b95aea4ab33698004df32736159520 +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..440da53c76f5088fec4ad519cb76384ee1db923a --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 24.023333333333333, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.1974, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 5.1091, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.9724, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.8054, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 4.5929, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.3775, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.2825, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 4.1618, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.0581, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.9257, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.89915410202624e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf23728cb351bee6085063f83f7e803d6bedcda0 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dcc33dc6371cac10b1f4ded0b5d9ae040495cd15054ecb6286ee8d8581855b2 +size 1055465009 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8ff7602257eb514bbfab54ed4325b0e250d6ff52 --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:796c480aa700201501f9efb36334dfdb6c5f4a75fee6a62746cd882b356319e5 +size 540327017 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4e40cfc3fbe1cc16b2fbc38392d5a032ee9b540b --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f510ca36d037b57f2390776274999ee0e099b5f6b5aeb15086ccebeaf6088086 +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5239ec925a79caa6e248c55926dda37b0b6d4500 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 28.013333333333332, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.1974, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 5.1091, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.9724, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.8054, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 4.5929, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.3775, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.2825, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 4.1618, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.0581, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.9257, + "step": 700 + }, + { + "epoch": 26.02, + "learning_rate": 0.00025, + "loss": 3.8778, + "step": 750 + }, + { + "epoch": 28.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.8028, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.17108594556928e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb13926620a897fd7a3e3fe1e2cafe96274a486d --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ee904189e840d3055fb9df0481bbea980bd899e212c65b225c12b1f7c89d2a1 +size 1055465009 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2a0696b287d3ca806b4fcce004a18849fee54bc --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5286140f5be68b266408d59b3408428deef852dc01e8949fe10d51f2a63c694 +size 540327017 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0ccd96392de889fba3629af5b969915b826919c9 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b8cdadc1cbe1012009a181f7e4d9fc59995b08df28c2f6fb11bba7264c448bf +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..722dee3ed169e3fc86d2537528cdd78868a104ac --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 32.00333333333333, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0688, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.3635, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.4596, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.8337, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 5.4472, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.1974, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 5.1091, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.9724, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.8054, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 4.5929, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.3775, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.2825, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 4.1618, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.0581, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.9257, + "step": 700 + }, + { + "epoch": 26.02, + "learning_rate": 0.00025, + "loss": 3.8778, + "step": 750 + }, + { + "epoch": 28.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.8028, + "step": 800 + }, + { + "epoch": 30.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.7326, + "step": 850 + }, + { + "epoch": 32.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.6708, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.44301778911232e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5b12aa45eb065a1dd1740d982095751bde6f0c --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 60000 +} diff --git a/metrics.json b/metrics.json new file mode 100755 index 0000000000000000000000000000000000000000..4c4f8f22d29bac0bc26bb16dc75ea95c51979921 --- /dev/null +++ b/metrics.json @@ -0,0 +1,2503 @@ +{"num_parameters": 131922432, "trainable_parameters": 131922432, "step": 0} +{"train_info/time_between_train_steps": 3.2481327056884766, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 27.93870997428894, "step": 1} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 20597.66015625, "train_info/memory_reserved": 24020.0, "train_info/memory_max_reserved": 24020.0, "_timestamp": 1732943395, "_runtime": 33}, "step": 1} +{"logs": {"train/loss": 11.0688, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1732943395, "_runtime": 33}, "step": 1} +{"train_info/time_between_train_steps": 0.007833480834960938, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 27.569900274276733, "step": 2} +{"train_info/time_between_train_steps": 0.0054166316986083984, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 27.526023149490356, "step": 3} +{"train_info/time_between_train_steps": 0.005384922027587891, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 27.606485843658447, "step": 4} +{"train_info/time_between_train_steps": 0.005772590637207031, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 27.377938985824585, "step": 5} +{"train_info/time_between_train_steps": 0.006208658218383789, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 27.57274556159973, "step": 6} +{"train_info/time_between_train_steps": 0.00630640983581543, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 27.57285714149475, "step": 7} +{"train_info/time_between_train_steps": 0.006185054779052734, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 27.52486300468445, "step": 8} +{"train_info/time_between_train_steps": 0.0060389041900634766, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 27.49018955230713, "step": 9} +{"train_info/time_between_train_steps": 0.010289669036865234, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 27.377405881881714, "step": 10} +{"train_info/time_between_train_steps": 0.0052089691162109375, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 27.384013414382935, "step": 11} +{"train_info/time_between_train_steps": 0.005136966705322266, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 27.48194193840027, "step": 12} +{"train_info/time_between_train_steps": 0.00520777702331543, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 27.471953630447388, "step": 13} +{"train_info/time_between_train_steps": 0.005706071853637695, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 27.425342321395874, "step": 14} +{"train_info/time_between_train_steps": 0.005240201950073242, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 27.570478677749634, "step": 15} +{"train_info/time_between_train_steps": 0.0055043697357177734, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 27.55572819709778, "step": 16} +{"train_info/time_between_train_steps": 0.005234956741333008, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 27.51678776741028, "step": 17} +{"train_info/time_between_train_steps": 0.010654211044311523, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 27.376811265945435, "step": 18} +{"train_info/time_between_train_steps": 0.010251998901367188, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 27.399075269699097, "step": 19} +{"train_info/time_between_train_steps": 0.006384372711181641, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 27.485938787460327, "step": 20} +{"train_info/time_between_train_steps": 0.016405820846557617, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 27.50249457359314, "step": 21} +{"train_info/time_between_train_steps": 0.015149831771850586, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 27.60693097114563, "step": 22} +{"train_info/time_between_train_steps": 0.009699821472167969, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 27.527070999145508, "step": 23} +{"train_info/time_between_train_steps": 0.0053157806396484375, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 27.391297101974487, "step": 24} +{"train_info/time_between_train_steps": 0.005268573760986328, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 27.464459657669067, "step": 25} +{"train_info/time_between_train_steps": 0.005558967590332031, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 27.390349864959717, "step": 26} +{"train_info/time_between_train_steps": 0.01054072380065918, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 27.41256284713745, "step": 27} +{"train_info/time_between_train_steps": 0.014928817749023438, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 27.499849319458008, "step": 28} +{"train_info/time_between_train_steps": 0.006158590316772461, "step": 28} +{"train_info/time_between_train_steps": 14.70653247833252, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 27.411985158920288, "step": 29} +{"train_info/time_between_train_steps": 0.0056421756744384766, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 27.719777822494507, "step": 30} +{"train_info/time_between_train_steps": 0.005839347839355469, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 27.549137353897095, "step": 31} +{"train_info/time_between_train_steps": 0.0054666996002197266, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 27.58830213546753, "step": 32} +{"train_info/time_between_train_steps": 0.0054759979248046875, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 27.480273485183716, "step": 33} +{"train_info/time_between_train_steps": 0.006581544876098633, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 27.7089900970459, "step": 34} +{"train_info/time_between_train_steps": 0.005475759506225586, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 27.473073720932007, "step": 35} +{"train_info/time_between_train_steps": 0.009040355682373047, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 27.87011742591858, "step": 36} +{"train_info/time_between_train_steps": 0.005404949188232422, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 27.57777976989746, "step": 37} +{"train_info/time_between_train_steps": 0.005305767059326172, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 27.489116191864014, "step": 38} +{"train_info/time_between_train_steps": 0.009084701538085938, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 27.389114141464233, "step": 39} +{"train_info/time_between_train_steps": 0.010072946548461914, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 27.51569437980652, "step": 40} +{"train_info/time_between_train_steps": 0.005259990692138672, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 27.42319631576538, "step": 41} +{"train_info/time_between_train_steps": 0.005549907684326172, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 27.60444402694702, "step": 42} +{"train_info/time_between_train_steps": 0.0052471160888671875, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 27.36705255508423, "step": 43} +{"train_info/time_between_train_steps": 0.0053026676177978516, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 27.358172178268433, "step": 44} +{"train_info/time_between_train_steps": 0.011718511581420898, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 27.426854133605957, "step": 45} +{"train_info/time_between_train_steps": 0.005272388458251953, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 27.364497900009155, "step": 46} +{"train_info/time_between_train_steps": 0.015349149703979492, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 27.493669033050537, "step": 47} +{"train_info/time_between_train_steps": 0.010812997817993164, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 27.373873949050903, "step": 48} +{"train_info/time_between_train_steps": 0.005320549011230469, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 27.353699684143066, "step": 49} +{"train_info/time_between_train_steps": 0.005160808563232422, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 27.502938270568848, "step": 50} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732944758, "_runtime": 1396}, "step": 50} +{"logs": {"train/loss": 8.3635, "train/learning_rate": 0.00025, "train/epoch": 1.02, "_timestamp": 1732944758, "_runtime": 1396}, "step": 50} +{"train_info/time_between_train_steps": 0.00728154182434082, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 27.668228149414062, "step": 51} +{"train_info/time_between_train_steps": 0.005288124084472656, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 27.531705856323242, "step": 52} +{"train_info/time_between_train_steps": 0.010514497756958008, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 27.367364168167114, "step": 53} +{"train_info/time_between_train_steps": 0.005805492401123047, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 27.46654748916626, "step": 54} +{"train_info/time_between_train_steps": 0.0054476261138916016, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 27.399053812026978, "step": 55} +{"train_info/time_between_train_steps": 0.005414485931396484, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 27.392711877822876, "step": 56} +{"train_info/time_between_train_steps": 0.005830287933349609, "step": 56} +{"train_info/time_between_train_steps": 14.365455627441406, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 27.355294227600098, "step": 57} +{"train_info/time_between_train_steps": 0.010822296142578125, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 27.4760639667511, "step": 58} +{"train_info/time_between_train_steps": 0.005262851715087891, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 27.39692974090576, "step": 59} +{"train_info/time_between_train_steps": 0.005693674087524414, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 27.640864610671997, "step": 60} +{"train_info/time_between_train_steps": 0.005303859710693359, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 27.38863706588745, "step": 61} +{"train_info/time_between_train_steps": 0.010818243026733398, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 27.61141037940979, "step": 62} +{"train_info/time_between_train_steps": 0.005690813064575195, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 27.447561740875244, "step": 63} +{"train_info/time_between_train_steps": 0.0062923431396484375, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 27.626927852630615, "step": 64} +{"train_info/time_between_train_steps": 0.006235361099243164, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 27.54359269142151, "step": 65} +{"train_info/time_between_train_steps": 0.007029533386230469, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 27.481492280960083, "step": 66} +{"train_info/time_between_train_steps": 0.005931854248046875, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 27.447484254837036, "step": 67} +{"train_info/time_between_train_steps": 0.005497455596923828, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 27.499680280685425, "step": 68} +{"train_info/time_between_train_steps": 0.01078343391418457, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 27.62116813659668, "step": 69} +{"train_info/time_between_train_steps": 0.0057027339935302734, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 27.47365975379944, "step": 70} +{"train_info/time_between_train_steps": 0.010885477066040039, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 27.47200608253479, "step": 71} +{"train_info/time_between_train_steps": 0.0063779354095458984, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 27.439193725585938, "step": 72} +{"train_info/time_between_train_steps": 0.005562543869018555, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 27.4512677192688, "step": 73} +{"train_info/time_between_train_steps": 0.007397890090942383, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 27.492368936538696, "step": 74} +{"train_info/time_between_train_steps": 0.005574464797973633, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 27.430750370025635, "step": 75} +{"train_info/time_between_train_steps": 0.005820751190185547, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 27.43212342262268, "step": 76} +{"train_info/time_between_train_steps": 0.010974884033203125, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 27.514796257019043, "step": 77} +{"train_info/time_between_train_steps": 0.005789756774902344, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 27.61381983757019, "step": 78} +{"train_info/time_between_train_steps": 0.005590915679931641, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 27.443655967712402, "step": 79} +{"train_info/time_between_train_steps": 0.005658626556396484, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 27.50606679916382, "step": 80} +{"train_info/time_between_train_steps": 0.006415843963623047, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 27.438836812973022, "step": 81} +{"train_info/time_between_train_steps": 0.005750894546508789, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 27.560670375823975, "step": 82} +{"train_info/time_between_train_steps": 0.005936145782470703, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 27.559638261795044, "step": 83} +{"train_info/time_between_train_steps": 0.011141300201416016, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 27.53143000602722, "step": 84} +{"train_info/time_between_train_steps": 0.006252765655517578, "step": 84} +{"train_info/time_between_train_steps": 14.674840927124023, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 27.404929637908936, "step": 85} +{"train_info/time_between_train_steps": 0.008108139038085938, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 27.869394779205322, "step": 86} +{"train_info/time_between_train_steps": 0.006314277648925781, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 27.40464973449707, "step": 87} +{"train_info/time_between_train_steps": 0.006424665451049805, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 27.600124835968018, "step": 88} +{"train_info/time_between_train_steps": 0.006414890289306641, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 27.407828092575073, "step": 89} +{"train_info/time_between_train_steps": 0.007957696914672852, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 27.572468519210815, "step": 90} +{"train_info/time_between_train_steps": 0.0061321258544921875, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 27.40745520591736, "step": 91} +{"train_info/time_between_train_steps": 0.006437540054321289, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 27.515557765960693, "step": 92} +{"train_info/time_between_train_steps": 0.006302356719970703, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 27.757561206817627, "step": 93} +{"train_info/time_between_train_steps": 0.01336216926574707, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 27.397241353988647, "step": 94} +{"train_info/time_between_train_steps": 0.005925416946411133, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 27.63575053215027, "step": 95} +{"train_info/time_between_train_steps": 0.00569462776184082, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 27.395448684692383, "step": 96} +{"train_info/time_between_train_steps": 0.005769491195678711, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 27.47964119911194, "step": 97} +{"train_info/time_between_train_steps": 0.005455493927001953, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 27.39879274368286, "step": 98} +{"train_info/time_between_train_steps": 0.005789756774902344, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 27.418116569519043, "step": 99} +{"train_info/time_between_train_steps": 0.005956172943115234, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 27.62036943435669, "step": 100} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732946162, "_runtime": 2800}, "step": 100} +{"logs": {"train/loss": 6.4596, "train/learning_rate": 0.0005, "train/epoch": 3.01, "_timestamp": 1732946162, "_runtime": 2800}, "step": 100} +{"train_info/time_between_train_steps": 2.526287078857422, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 27.38373613357544, "step": 101} +{"train_info/time_between_train_steps": 0.005713701248168945, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 27.61883020401001, "step": 102} +{"train_info/time_between_train_steps": 0.006597042083740234, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 27.399724006652832, "step": 103} +{"train_info/time_between_train_steps": 0.005891561508178711, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 27.52633762359619, "step": 104} +{"train_info/time_between_train_steps": 0.011039495468139648, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 27.45848035812378, "step": 105} +{"train_info/time_between_train_steps": 0.005722761154174805, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 27.517287015914917, "step": 106} +{"train_info/time_between_train_steps": 0.0055959224700927734, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 27.387986421585083, "step": 107} +{"train_info/time_between_train_steps": 0.005773067474365234, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 27.38632035255432, "step": 108} +{"train_info/time_between_train_steps": 0.005751848220825195, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 27.653902769088745, "step": 109} +{"train_info/time_between_train_steps": 0.00701451301574707, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 27.41575574874878, "step": 110} +{"train_info/time_between_train_steps": 0.005650520324707031, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 27.42734718322754, "step": 111} +{"train_info/time_between_train_steps": 0.005734682083129883, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 27.41632604598999, "step": 112} +{"train_info/time_between_train_steps": 0.006381034851074219, "step": 112} +{"train_info/time_between_train_steps": 14.666259765625, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 27.390249967575073, "step": 113} +{"train_info/time_between_train_steps": 0.005196332931518555, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 27.453588008880615, "step": 114} +{"train_info/time_between_train_steps": 0.005092144012451172, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 27.386144876480103, "step": 115} +{"train_info/time_between_train_steps": 0.005440950393676758, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 27.513628005981445, "step": 116} +{"train_info/time_between_train_steps": 0.005449533462524414, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 27.368239402770996, "step": 117} +{"train_info/time_between_train_steps": 0.005200862884521484, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 27.561086893081665, "step": 118} +{"train_info/time_between_train_steps": 0.0061187744140625, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 27.36002016067505, "step": 119} +{"train_info/time_between_train_steps": 0.005240917205810547, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 27.42653465270996, "step": 120} +{"train_info/time_between_train_steps": 0.0052814483642578125, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 27.555333614349365, "step": 121} +{"train_info/time_between_train_steps": 0.01020193099975586, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 27.373188257217407, "step": 122} +{"train_info/time_between_train_steps": 0.005102634429931641, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 27.445774793624878, "step": 123} +{"train_info/time_between_train_steps": 0.009330511093139648, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 27.488176345825195, "step": 124} +{"train_info/time_between_train_steps": 0.005277395248413086, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 27.362690687179565, "step": 125} +{"train_info/time_between_train_steps": 0.0055925846099853516, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 27.407543182373047, "step": 126} +{"train_info/time_between_train_steps": 0.005444049835205078, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 27.44820547103882, "step": 127} +{"train_info/time_between_train_steps": 0.0053098201751708984, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 27.387826919555664, "step": 128} +{"train_info/time_between_train_steps": 0.0052874088287353516, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 27.347395420074463, "step": 129} +{"train_info/time_between_train_steps": 0.010198354721069336, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 27.433371782302856, "step": 130} +{"train_info/time_between_train_steps": 0.005324840545654297, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 27.380563974380493, "step": 131} +{"train_info/time_between_train_steps": 0.005310535430908203, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 27.35117220878601, "step": 132} +{"train_info/time_between_train_steps": 0.005366802215576172, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 27.769594430923462, "step": 133} +{"train_info/time_between_train_steps": 0.010472774505615234, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 27.416422843933105, "step": 134} +{"train_info/time_between_train_steps": 0.005380392074584961, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 27.655356645584106, "step": 135} +{"train_info/time_between_train_steps": 0.005352497100830078, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 27.37032723426819, "step": 136} +{"train_info/time_between_train_steps": 0.005289554595947266, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 27.527177572250366, "step": 137} +{"train_info/time_between_train_steps": 0.005431175231933594, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 27.363161325454712, "step": 138} +{"train_info/time_between_train_steps": 0.005415439605712891, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 27.48947763442993, "step": 139} +{"train_info/time_between_train_steps": 0.005631685256958008, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 27.560516357421875, "step": 140} +{"train_info/time_between_train_steps": 0.011068344116210938, "step": 140} +{"train_info/time_between_train_steps": 14.444876909255981, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 27.370031356811523, "step": 141} +{"train_info/time_between_train_steps": 0.00549626350402832, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 27.75755500793457, "step": 142} +{"train_info/time_between_train_steps": 0.0054624080657958984, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 27.401529550552368, "step": 143} +{"train_info/time_between_train_steps": 0.01029658317565918, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 27.594818592071533, "step": 144} +{"train_info/time_between_train_steps": 0.013385534286499023, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 27.412445306777954, "step": 145} +{"train_info/time_between_train_steps": 0.005923748016357422, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 27.607998609542847, "step": 146} +{"train_info/time_between_train_steps": 0.013180255889892578, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 27.436535596847534, "step": 147} +{"train_info/time_between_train_steps": 0.006016731262207031, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 27.63032841682434, "step": 148} +{"train_info/time_between_train_steps": 0.005571842193603516, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 27.460343837738037, "step": 149} +{"train_info/time_between_train_steps": 0.005357027053833008, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 27.361629247665405, "step": 150} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732947567, "_runtime": 4205}, "step": 150} +{"logs": {"train/loss": 5.8337, "train/learning_rate": 0.0005833333333333333, "train/epoch": 5.01, "_timestamp": 1732947567, "_runtime": 4205}, "step": 150} +{"train_info/time_between_train_steps": 0.007359027862548828, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 27.37695550918579, "step": 151} +{"train_info/time_between_train_steps": 0.005386829376220703, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 27.35835576057434, "step": 152} +{"train_info/time_between_train_steps": 0.005327701568603516, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 27.704925537109375, "step": 153} +{"train_info/time_between_train_steps": 0.010222196578979492, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 27.386279582977295, "step": 154} +{"train_info/time_between_train_steps": 0.0051860809326171875, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 27.484529495239258, "step": 155} +{"train_info/time_between_train_steps": 0.005605459213256836, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 27.38252544403076, "step": 156} +{"train_info/time_between_train_steps": 0.005482673645019531, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 27.340068101882935, "step": 157} +{"train_info/time_between_train_steps": 0.005243062973022461, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 27.353280782699585, "step": 158} +{"train_info/time_between_train_steps": 0.005550861358642578, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 27.34284496307373, "step": 159} +{"train_info/time_between_train_steps": 0.011708259582519531, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 27.57379460334778, "step": 160} +{"train_info/time_between_train_steps": 0.013377904891967773, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 27.36777663230896, "step": 161} +{"train_info/time_between_train_steps": 0.005507707595825195, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 27.368560075759888, "step": 162} +{"train_info/time_between_train_steps": 0.005593061447143555, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 27.412662267684937, "step": 163} +{"train_info/time_between_train_steps": 0.005182504653930664, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 27.433250665664673, "step": 164} +{"train_info/time_between_train_steps": 0.005300283432006836, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 27.372089624404907, "step": 165} +{"train_info/time_between_train_steps": 0.005474567413330078, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 27.45999503135681, "step": 166} +{"train_info/time_between_train_steps": 0.0061991214752197266, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 27.41047501564026, "step": 167} +{"train_info/time_between_train_steps": 0.006076812744140625, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 27.412220239639282, "step": 168} +{"train_info/time_between_train_steps": 0.011171102523803711, "step": 168} +{"train_info/time_between_train_steps": 14.632018566131592, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 27.348656177520752, "step": 169} +{"train_info/time_between_train_steps": 0.005349397659301758, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 27.59303307533264, "step": 170} +{"train_info/time_between_train_steps": 0.0053408145904541016, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 27.40922451019287, "step": 171} +{"train_info/time_between_train_steps": 0.005159854888916016, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 27.500378847122192, "step": 172} +{"train_info/time_between_train_steps": 0.005567073822021484, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 27.353158235549927, "step": 173} +{"train_info/time_between_train_steps": 0.00529170036315918, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 27.596213579177856, "step": 174} +{"train_info/time_between_train_steps": 0.00564122200012207, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 27.349876403808594, "step": 175} +{"train_info/time_between_train_steps": 0.005203723907470703, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 27.581763744354248, "step": 176} +{"train_info/time_between_train_steps": 0.008880615234375, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 27.434614181518555, "step": 177} +{"train_info/time_between_train_steps": 0.01051187515258789, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 27.381038665771484, "step": 178} +{"train_info/time_between_train_steps": 0.01006460189819336, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 27.5493106842041, "step": 179} +{"train_info/time_between_train_steps": 0.005284547805786133, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 27.367424249649048, "step": 180} +{"train_info/time_between_train_steps": 0.0050678253173828125, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 27.36330795288086, "step": 181} +{"train_info/time_between_train_steps": 0.005684852600097656, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 27.4117431640625, "step": 182} +{"train_info/time_between_train_steps": 0.0052585601806640625, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 27.41340446472168, "step": 183} +{"train_info/time_between_train_steps": 0.011398553848266602, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 27.452045917510986, "step": 184} +{"train_info/time_between_train_steps": 0.010294198989868164, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 27.457027196884155, "step": 185} +{"train_info/time_between_train_steps": 0.0050542354583740234, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 27.366002559661865, "step": 186} +{"train_info/time_between_train_steps": 0.010285377502441406, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 27.469005346298218, "step": 187} +{"train_info/time_between_train_steps": 0.010203123092651367, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 27.482348680496216, "step": 188} +{"train_info/time_between_train_steps": 0.005135297775268555, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 27.511562824249268, "step": 189} +{"train_info/time_between_train_steps": 0.005368947982788086, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 27.456120491027832, "step": 190} +{"train_info/time_between_train_steps": 0.005280494689941406, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 27.805620670318604, "step": 191} +{"train_info/time_between_train_steps": 0.00522613525390625, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 27.56040334701538, "step": 192} +{"train_info/time_between_train_steps": 0.01047515869140625, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 27.576990365982056, "step": 193} +{"train_info/time_between_train_steps": 0.009924173355102539, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 27.354443073272705, "step": 194} +{"train_info/time_between_train_steps": 0.005421638488769531, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 27.581568241119385, "step": 195} +{"train_info/time_between_train_steps": 0.0055408477783203125, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 27.378737926483154, "step": 196} +{"train_info/time_between_train_steps": 0.016100168228149414, "step": 196} +{"train_info/time_between_train_steps": 14.616089820861816, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 27.347873210906982, "step": 197} +{"train_info/time_between_train_steps": 0.010765790939331055, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 27.500903367996216, "step": 198} +{"train_info/time_between_train_steps": 0.013489484786987305, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 27.55465054512024, "step": 199} +{"train_info/time_between_train_steps": 0.00630950927734375, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 27.48155117034912, "step": 200} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732948970, "_runtime": 5608}, "step": 200} +{"logs": {"train/loss": 5.4472, "train/learning_rate": 0.0005555555555555556, "train/epoch": 7.0, "_timestamp": 1732948970, "_runtime": 5608}, "step": 200} +{"train_info/time_between_train_steps": 2.503857374191284, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 27.55684804916382, "step": 201} +{"train_info/time_between_train_steps": 0.00552821159362793, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 27.61298179626465, "step": 202} +{"train_info/time_between_train_steps": 0.005939483642578125, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 27.420921802520752, "step": 203} +{"train_info/time_between_train_steps": 0.005915164947509766, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 27.51962947845459, "step": 204} +{"train_info/time_between_train_steps": 0.0061855316162109375, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 27.578516006469727, "step": 205} +{"train_info/time_between_train_steps": 0.009366989135742188, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 27.401817798614502, "step": 206} +{"train_info/time_between_train_steps": 0.005549907684326172, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 27.385042428970337, "step": 207} +{"train_info/time_between_train_steps": 0.005377769470214844, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 27.458427667617798, "step": 208} +{"train_info/time_between_train_steps": 0.005507946014404297, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 27.527421951293945, "step": 209} +{"train_info/time_between_train_steps": 0.010378122329711914, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 27.618298053741455, "step": 210} +{"train_info/time_between_train_steps": 0.005384683609008789, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 27.48075008392334, "step": 211} +{"train_info/time_between_train_steps": 0.010745048522949219, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 27.572097539901733, "step": 212} +{"train_info/time_between_train_steps": 0.005389213562011719, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 27.467754125595093, "step": 213} +{"train_info/time_between_train_steps": 0.005552768707275391, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 27.38420009613037, "step": 214} +{"train_info/time_between_train_steps": 0.0053348541259765625, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 27.37370729446411, "step": 215} +{"train_info/time_between_train_steps": 0.0052487850189208984, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 27.594205856323242, "step": 216} +{"train_info/time_between_train_steps": 0.005227804183959961, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 27.52656841278076, "step": 217} +{"train_info/time_between_train_steps": 0.010387897491455078, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 27.741759061813354, "step": 218} +{"train_info/time_between_train_steps": 0.005597829818725586, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 27.42590022087097, "step": 219} +{"train_info/time_between_train_steps": 0.005492210388183594, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 27.52257990837097, "step": 220} +{"train_info/time_between_train_steps": 0.005412101745605469, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 27.52742075920105, "step": 221} +{"train_info/time_between_train_steps": 0.010595321655273438, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 27.41420006752014, "step": 222} +{"train_info/time_between_train_steps": 0.005486249923706055, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 27.495466470718384, "step": 223} +{"train_info/time_between_train_steps": 0.0056803226470947266, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 27.445333003997803, "step": 224} +{"train_info/time_between_train_steps": 0.010580301284790039, "step": 224} +{"train_info/time_between_train_steps": 14.4803466796875, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 27.506507635116577, "step": 225} +{"train_info/time_between_train_steps": 0.005053520202636719, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 27.74851655960083, "step": 226} +{"train_info/time_between_train_steps": 0.01081085205078125, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 27.466876983642578, "step": 227} +{"train_info/time_between_train_steps": 0.006232023239135742, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 27.79673480987549, "step": 228} +{"train_info/time_between_train_steps": 0.0059740543365478516, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 27.42812418937683, "step": 229} +{"train_info/time_between_train_steps": 0.0059354305267333984, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 27.637670755386353, "step": 230} +{"train_info/time_between_train_steps": 0.006090879440307617, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 27.493889331817627, "step": 231} +{"train_info/time_between_train_steps": 0.005914211273193359, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 27.5344398021698, "step": 232} +{"train_info/time_between_train_steps": 0.0055997371673583984, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 27.549020290374756, "step": 233} +{"train_info/time_between_train_steps": 0.005379915237426758, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 27.424307346343994, "step": 234} +{"train_info/time_between_train_steps": 0.005606889724731445, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 27.368574857711792, "step": 235} +{"train_info/time_between_train_steps": 0.005425930023193359, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 27.37300682067871, "step": 236} +{"train_info/time_between_train_steps": 0.0054361820220947266, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 27.394675970077515, "step": 237} +{"train_info/time_between_train_steps": 0.010686635971069336, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 27.379732131958008, "step": 238} +{"train_info/time_between_train_steps": 0.005172252655029297, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 27.389070987701416, "step": 239} +{"train_info/time_between_train_steps": 0.005417346954345703, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 27.43759799003601, "step": 240} +{"train_info/time_between_train_steps": 0.005601167678833008, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 27.37083911895752, "step": 241} +{"train_info/time_between_train_steps": 0.005396127700805664, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 27.387388706207275, "step": 242} +{"train_info/time_between_train_steps": 0.005314350128173828, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 27.427005767822266, "step": 243} +{"train_info/time_between_train_steps": 0.005330085754394531, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 27.442853212356567, "step": 244} +{"train_info/time_between_train_steps": 0.005422830581665039, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 27.40001940727234, "step": 245} +{"train_info/time_between_train_steps": 0.00571751594543457, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 27.57637095451355, "step": 246} +{"train_info/time_between_train_steps": 0.006142616271972656, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 27.490089416503906, "step": 247} +{"train_info/time_between_train_steps": 0.005628347396850586, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 27.45512628555298, "step": 248} +{"train_info/time_between_train_steps": 0.005475044250488281, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 27.42845606803894, "step": 249} +{"train_info/time_between_train_steps": 0.0057373046875, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 27.50446915626526, "step": 250} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732950361, "_runtime": 6999}, "step": 250} +{"logs": {"train/loss": 5.1974, "train/learning_rate": 0.0005277777777777777, "train/epoch": 8.02, "_timestamp": 1732950361, "_runtime": 6999}, "step": 250} +{"train_info/time_between_train_steps": 0.007810831069946289, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 27.44231939315796, "step": 251} +{"train_info/time_between_train_steps": 0.005974531173706055, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 27.507672786712646, "step": 252} +{"train_info/time_between_train_steps": 0.006490945816040039, "step": 252} +{"train_info/time_between_train_steps": 14.45392632484436, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 27.368412733078003, "step": 253} +{"train_info/time_between_train_steps": 0.0057065486907958984, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 27.555601119995117, "step": 254} +{"train_info/time_between_train_steps": 0.005658149719238281, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 27.424203634262085, "step": 255} +{"train_info/time_between_train_steps": 0.005697488784790039, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 27.595689296722412, "step": 256} +{"train_info/time_between_train_steps": 0.01246023178100586, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 27.58968162536621, "step": 257} +{"train_info/time_between_train_steps": 0.0058498382568359375, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 27.560628175735474, "step": 258} +{"train_info/time_between_train_steps": 0.008349895477294922, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 27.36305809020996, "step": 259} +{"train_info/time_between_train_steps": 0.005640745162963867, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 27.50528359413147, "step": 260} +{"train_info/time_between_train_steps": 0.0053348541259765625, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 27.425442457199097, "step": 261} +{"train_info/time_between_train_steps": 0.005346059799194336, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 27.599815845489502, "step": 262} +{"train_info/time_between_train_steps": 0.010267257690429688, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 27.515455722808838, "step": 263} +{"train_info/time_between_train_steps": 0.00516963005065918, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 27.46644377708435, "step": 264} +{"train_info/time_between_train_steps": 0.005002498626708984, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 27.363860368728638, "step": 265} +{"train_info/time_between_train_steps": 0.005194425582885742, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 27.390620946884155, "step": 266} +{"train_info/time_between_train_steps": 0.005118846893310547, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 27.509012699127197, "step": 267} +{"train_info/time_between_train_steps": 0.005145072937011719, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 27.40645718574524, "step": 268} +{"train_info/time_between_train_steps": 0.010306119918823242, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 27.67616081237793, "step": 269} +{"train_info/time_between_train_steps": 0.010040283203125, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 27.355063915252686, "step": 270} +{"train_info/time_between_train_steps": 0.005150318145751953, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 27.36303400993347, "step": 271} +{"train_info/time_between_train_steps": 0.005132198333740234, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 27.38129162788391, "step": 272} +{"train_info/time_between_train_steps": 0.005097866058349609, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 27.520975351333618, "step": 273} +{"train_info/time_between_train_steps": 0.009757757186889648, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 27.461338996887207, "step": 274} +{"train_info/time_between_train_steps": 0.006609678268432617, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 27.598721742630005, "step": 275} +{"train_info/time_between_train_steps": 0.010407209396362305, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 27.547789335250854, "step": 276} +{"train_info/time_between_train_steps": 0.00592350959777832, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 27.414093017578125, "step": 277} +{"train_info/time_between_train_steps": 0.0052776336669921875, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 27.622326135635376, "step": 278} +{"train_info/time_between_train_steps": 0.00535273551940918, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 27.394931316375732, "step": 279} +{"train_info/time_between_train_steps": 0.005419015884399414, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 27.396883010864258, "step": 280} +{"train_info/time_between_train_steps": 0.005788564682006836, "step": 280} +{"train_info/time_between_train_steps": 14.479071855545044, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 27.389151096343994, "step": 281} +{"train_info/time_between_train_steps": 0.005819559097290039, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 27.568918704986572, "step": 282} +{"train_info/time_between_train_steps": 0.005359172821044922, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 27.388155460357666, "step": 283} +{"train_info/time_between_train_steps": 0.006328582763671875, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 27.73587942123413, "step": 284} +{"train_info/time_between_train_steps": 0.0062046051025390625, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 27.39226269721985, "step": 285} +{"train_info/time_between_train_steps": 0.005827426910400391, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 27.568317890167236, "step": 286} +{"train_info/time_between_train_steps": 0.00565028190612793, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 27.460468769073486, "step": 287} +{"train_info/time_between_train_steps": 0.012726306915283203, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 27.58630108833313, "step": 288} +{"train_info/time_between_train_steps": 0.0053310394287109375, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 27.39510726928711, "step": 289} +{"train_info/time_between_train_steps": 0.0053386688232421875, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 27.49015212059021, "step": 290} +{"train_info/time_between_train_steps": 0.0051915645599365234, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 27.536815643310547, "step": 291} +{"train_info/time_between_train_steps": 0.009742259979248047, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 27.59512972831726, "step": 292} +{"train_info/time_between_train_steps": 0.009740352630615234, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 27.67795205116272, "step": 293} +{"train_info/time_between_train_steps": 0.005343914031982422, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 27.417489767074585, "step": 294} +{"train_info/time_between_train_steps": 0.009613513946533203, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 27.54036831855774, "step": 295} +{"train_info/time_between_train_steps": 0.005245208740234375, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 27.374690771102905, "step": 296} +{"train_info/time_between_train_steps": 0.005311727523803711, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 27.350056171417236, "step": 297} +{"train_info/time_between_train_steps": 0.005135059356689453, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 27.3747136592865, "step": 298} +{"train_info/time_between_train_steps": 0.005205631256103516, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 27.358184099197388, "step": 299} +{"train_info/time_between_train_steps": 0.005132198333740234, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 27.357333421707153, "step": 300} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732951765, "_runtime": 8403}, "step": 300} +{"logs": {"train/loss": 5.1091, "train/learning_rate": 0.0005, "train/epoch": 10.02, "_timestamp": 1732951765, "_runtime": 8403}, "step": 300} +{"train_info/time_between_train_steps": 2.500194549560547, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 27.33908987045288, "step": 301} +{"train_info/time_between_train_steps": 0.005259990692138672, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 27.381315231323242, "step": 302} +{"train_info/time_between_train_steps": 0.010376691818237305, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 27.387537240982056, "step": 303} +{"train_info/time_between_train_steps": 0.005143404006958008, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 27.328981161117554, "step": 304} +{"train_info/time_between_train_steps": 0.005385398864746094, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 27.320592403411865, "step": 305} +{"train_info/time_between_train_steps": 0.00555109977722168, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 27.34111523628235, "step": 306} +{"train_info/time_between_train_steps": 0.0056304931640625, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 27.37130308151245, "step": 307} +{"train_info/time_between_train_steps": 0.005691051483154297, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 27.402584314346313, "step": 308} +{"train_info/time_between_train_steps": 0.006002664566040039, "step": 308} +{"train_info/time_between_train_steps": 14.761247158050537, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 27.357048273086548, "step": 309} +{"train_info/time_between_train_steps": 0.012788534164428711, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 27.551549673080444, "step": 310} +{"train_info/time_between_train_steps": 0.012813806533813477, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 27.386369466781616, "step": 311} +{"train_info/time_between_train_steps": 0.005218505859375, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 27.502081871032715, "step": 312} +{"train_info/time_between_train_steps": 0.005621194839477539, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 27.522814512252808, "step": 313} +{"train_info/time_between_train_steps": 0.01235651969909668, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 27.529982566833496, "step": 314} +{"train_info/time_between_train_steps": 0.011595487594604492, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 27.560405254364014, "step": 315} +{"train_info/time_between_train_steps": 0.005589008331298828, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 27.48099637031555, "step": 316} +{"train_info/time_between_train_steps": 0.005336761474609375, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 27.37707781791687, "step": 317} +{"train_info/time_between_train_steps": 0.00495600700378418, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 27.420858144760132, "step": 318} +{"train_info/time_between_train_steps": 0.005083322525024414, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 27.40484094619751, "step": 319} +{"train_info/time_between_train_steps": 0.009394407272338867, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 27.33451199531555, "step": 320} +{"train_info/time_between_train_steps": 0.00500178337097168, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 27.351197719573975, "step": 321} +{"train_info/time_between_train_steps": 0.005165576934814453, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 27.435906887054443, "step": 322} +{"train_info/time_between_train_steps": 0.011641740798950195, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 27.462158918380737, "step": 323} +{"train_info/time_between_train_steps": 0.005101680755615234, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 27.51569890975952, "step": 324} +{"train_info/time_between_train_steps": 0.005183219909667969, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 27.56147813796997, "step": 325} +{"train_info/time_between_train_steps": 0.009693145751953125, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 27.3546941280365, "step": 326} +{"train_info/time_between_train_steps": 0.004998922348022461, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 27.340335607528687, "step": 327} +{"train_info/time_between_train_steps": 0.005003213882446289, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 27.448230028152466, "step": 328} +{"train_info/time_between_train_steps": 0.014654874801635742, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 27.361562252044678, "step": 329} +{"train_info/time_between_train_steps": 0.014855146408081055, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 27.414008617401123, "step": 330} +{"train_info/time_between_train_steps": 0.005218505859375, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 27.38658356666565, "step": 331} +{"train_info/time_between_train_steps": 0.0049860477447509766, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 27.36653447151184, "step": 332} +{"train_info/time_between_train_steps": 0.005344390869140625, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 27.36101222038269, "step": 333} +{"train_info/time_between_train_steps": 0.005236625671386719, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 27.381267070770264, "step": 334} +{"train_info/time_between_train_steps": 0.010027885437011719, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 27.370970249176025, "step": 335} +{"train_info/time_between_train_steps": 0.005296468734741211, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 27.571604251861572, "step": 336} +{"train_info/time_between_train_steps": 0.006666660308837891, "step": 336} +{"train_info/time_between_train_steps": 14.20847463607788, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 27.375461101531982, "step": 337} +{"train_info/time_between_train_steps": 0.00570368766784668, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 27.55630350112915, "step": 338} +{"train_info/time_between_train_steps": 0.005345344543457031, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 27.465150833129883, "step": 339} +{"train_info/time_between_train_steps": 0.0055921077728271484, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 27.56827211380005, "step": 340} +{"train_info/time_between_train_steps": 0.007735490798950195, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 27.46852731704712, "step": 341} +{"train_info/time_between_train_steps": 0.005805015563964844, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 27.714733600616455, "step": 342} +{"train_info/time_between_train_steps": 0.005650997161865234, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 27.503203630447388, "step": 343} +{"train_info/time_between_train_steps": 0.005675077438354492, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 27.52447247505188, "step": 344} +{"train_info/time_between_train_steps": 0.005537271499633789, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 27.427902221679688, "step": 345} +{"train_info/time_between_train_steps": 0.005116701126098633, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 27.447869062423706, "step": 346} +{"train_info/time_between_train_steps": 0.009890556335449219, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 27.56831121444702, "step": 347} +{"train_info/time_between_train_steps": 0.005338191986083984, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 27.377293348312378, "step": 348} +{"train_info/time_between_train_steps": 0.005384922027587891, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 27.506483554840088, "step": 349} +{"train_info/time_between_train_steps": 0.006699085235595703, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 27.4615797996521, "step": 350} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732953168, "_runtime": 9806}, "step": 350} +{"logs": {"train/loss": 4.9724, "train/learning_rate": 0.00047222222222222224, "train/epoch": 12.01, "_timestamp": 1732953168, "_runtime": 9806}, "step": 350} +{"train_info/time_between_train_steps": 0.013280630111694336, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 27.38720941543579, "step": 351} +{"train_info/time_between_train_steps": 0.00563502311706543, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 27.410947799682617, "step": 352} +{"train_info/time_between_train_steps": 0.00546717643737793, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 27.543145895004272, "step": 353} +{"train_info/time_between_train_steps": 0.005445003509521484, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 27.365270614624023, "step": 354} +{"train_info/time_between_train_steps": 0.012284040451049805, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 27.754403352737427, "step": 355} +{"train_info/time_between_train_steps": 0.006868600845336914, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 27.421066761016846, "step": 356} +{"train_info/time_between_train_steps": 0.010128259658813477, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 27.400007247924805, "step": 357} +{"train_info/time_between_train_steps": 0.010904788970947266, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 27.36131191253662, "step": 358} +{"train_info/time_between_train_steps": 0.005544185638427734, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 27.390732049942017, "step": 359} +{"train_info/time_between_train_steps": 0.005499839782714844, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 27.397077560424805, "step": 360} +{"train_info/time_between_train_steps": 0.005644321441650391, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 27.411872625350952, "step": 361} +{"train_info/time_between_train_steps": 0.005462646484375, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 27.65293025970459, "step": 362} +{"train_info/time_between_train_steps": 0.00769805908203125, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 27.440024852752686, "step": 363} +{"train_info/time_between_train_steps": 0.005753993988037109, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 27.420095205307007, "step": 364} +{"train_info/time_between_train_steps": 0.00629878044128418, "step": 364} +{"train_info/time_between_train_steps": 14.466418504714966, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 27.726123809814453, "step": 365} +{"train_info/time_between_train_steps": 0.015191316604614258, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 27.83284020423889, "step": 366} +{"train_info/time_between_train_steps": 0.00570225715637207, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 27.515506505966187, "step": 367} +{"train_info/time_between_train_steps": 0.005620002746582031, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 27.56434202194214, "step": 368} +{"train_info/time_between_train_steps": 0.005345582962036133, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 27.362585067749023, "step": 369} +{"train_info/time_between_train_steps": 0.005602359771728516, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 27.659828186035156, "step": 370} +{"train_info/time_between_train_steps": 0.00547027587890625, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 27.417495012283325, "step": 371} +{"train_info/time_between_train_steps": 0.011269807815551758, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 27.498193502426147, "step": 372} +{"train_info/time_between_train_steps": 0.00541377067565918, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 27.383286237716675, "step": 373} +{"train_info/time_between_train_steps": 0.005182743072509766, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 27.33930778503418, "step": 374} +{"train_info/time_between_train_steps": 0.009900808334350586, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 27.33915138244629, "step": 375} +{"train_info/time_between_train_steps": 0.005128383636474609, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 27.51814556121826, "step": 376} +{"train_info/time_between_train_steps": 0.005173444747924805, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 27.35002899169922, "step": 377} +{"train_info/time_between_train_steps": 0.005284547805786133, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 27.34164834022522, "step": 378} +{"train_info/time_between_train_steps": 0.005009889602661133, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 27.374985218048096, "step": 379} +{"train_info/time_between_train_steps": 0.005152225494384766, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 27.350565671920776, "step": 380} +{"train_info/time_between_train_steps": 0.005372047424316406, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 27.36176896095276, "step": 381} +{"train_info/time_between_train_steps": 0.005005359649658203, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 27.370381355285645, "step": 382} +{"train_info/time_between_train_steps": 0.009586334228515625, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 27.346685886383057, "step": 383} +{"train_info/time_between_train_steps": 0.01128244400024414, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 27.334998846054077, "step": 384} +{"train_info/time_between_train_steps": 0.005162477493286133, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 27.36258578300476, "step": 385} +{"train_info/time_between_train_steps": 0.00527501106262207, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 27.43100142478943, "step": 386} +{"train_info/time_between_train_steps": 0.005337238311767578, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 27.485969066619873, "step": 387} +{"train_info/time_between_train_steps": 0.0055081844329833984, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 27.40821385383606, "step": 388} +{"train_info/time_between_train_steps": 0.005324840545654297, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 27.35845136642456, "step": 389} +{"train_info/time_between_train_steps": 0.005561351776123047, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 27.503490686416626, "step": 390} +{"train_info/time_between_train_steps": 0.010833740234375, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 27.629401683807373, "step": 391} +{"train_info/time_between_train_steps": 0.005612611770629883, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 27.86233687400818, "step": 392} +{"train_info/time_between_train_steps": 0.015804052352905273, "step": 392} +{"train_info/time_between_train_steps": 15.343852519989014, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 27.80068612098694, "step": 393} +{"train_info/time_between_train_steps": 0.005535125732421875, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 27.820201873779297, "step": 394} +{"train_info/time_between_train_steps": 0.005728483200073242, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 27.40245294570923, "step": 395} +{"train_info/time_between_train_steps": 0.00562596321105957, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 27.598917245864868, "step": 396} +{"train_info/time_between_train_steps": 0.005342960357666016, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 27.38227128982544, "step": 397} +{"train_info/time_between_train_steps": 0.011159658432006836, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 27.57516837120056, "step": 398} +{"train_info/time_between_train_steps": 0.005506038665771484, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 27.619200229644775, "step": 399} +{"train_info/time_between_train_steps": 0.0055043697357177734, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 27.685478925704956, "step": 400} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732954573, "_runtime": 11211}, "step": 400} +{"logs": {"train/loss": 4.8054, "train/learning_rate": 0.00044444444444444436, "train/epoch": 14.01, "_timestamp": 1732954573, "_runtime": 11211}, "step": 400} +{"train_info/time_between_train_steps": 2.45346736907959, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 27.520081520080566, "step": 401} +{"train_info/time_between_train_steps": 0.00531768798828125, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 27.399240970611572, "step": 402} +{"train_info/time_between_train_steps": 0.005746126174926758, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 27.441065549850464, "step": 403} +{"train_info/time_between_train_steps": 0.0058634281158447266, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 27.525526762008667, "step": 404} +{"train_info/time_between_train_steps": 0.007090330123901367, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 27.46709179878235, "step": 405} +{"train_info/time_between_train_steps": 0.01276254653930664, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 27.648627281188965, "step": 406} +{"train_info/time_between_train_steps": 0.007365703582763672, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 27.49531364440918, "step": 407} +{"train_info/time_between_train_steps": 0.011977434158325195, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 27.508465051651, "step": 408} +{"train_info/time_between_train_steps": 0.006173372268676758, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 27.508992910385132, "step": 409} +{"train_info/time_between_train_steps": 0.007602691650390625, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 27.52126121520996, "step": 410} +{"train_info/time_between_train_steps": 0.011289834976196289, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 27.4307758808136, "step": 411} +{"train_info/time_between_train_steps": 0.0058786869049072266, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 27.469977617263794, "step": 412} +{"train_info/time_between_train_steps": 0.0070993900299072266, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 27.459405183792114, "step": 413} +{"train_info/time_between_train_steps": 0.011430740356445312, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 27.497270345687866, "step": 414} +{"train_info/time_between_train_steps": 0.008470296859741211, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 27.429710865020752, "step": 415} +{"train_info/time_between_train_steps": 0.006070137023925781, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 27.580437421798706, "step": 416} +{"train_info/time_between_train_steps": 0.006534576416015625, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 27.47082233428955, "step": 417} +{"train_info/time_between_train_steps": 0.006361484527587891, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 27.43656897544861, "step": 418} +{"train_info/time_between_train_steps": 0.008386373519897461, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 27.442814111709595, "step": 419} +{"train_info/time_between_train_steps": 0.0061185359954833984, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 27.443182229995728, "step": 420} +{"train_info/time_between_train_steps": 0.00656437873840332, "step": 420} +{"train_info/time_between_train_steps": 14.383973121643066, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 27.536844491958618, "step": 421} +{"train_info/time_between_train_steps": 0.013072013854980469, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 27.676921129226685, "step": 422} +{"train_info/time_between_train_steps": 0.005570888519287109, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 27.466707468032837, "step": 423} +{"train_info/time_between_train_steps": 0.005590915679931641, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 27.718488216400146, "step": 424} +{"train_info/time_between_train_steps": 0.005527973175048828, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 27.449984788894653, "step": 425} +{"train_info/time_between_train_steps": 0.00983881950378418, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 27.63674807548523, "step": 426} +{"train_info/time_between_train_steps": 0.0060100555419921875, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 27.45196795463562, "step": 427} +{"train_info/time_between_train_steps": 0.0057756900787353516, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 27.62912154197693, "step": 428} +{"train_info/time_between_train_steps": 0.005343437194824219, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 27.56450128555298, "step": 429} +{"train_info/time_between_train_steps": 0.005183696746826172, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 27.413596153259277, "step": 430} +{"train_info/time_between_train_steps": 0.005297422409057617, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 27.40464210510254, "step": 431} +{"train_info/time_between_train_steps": 0.005304098129272461, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 27.543198823928833, "step": 432} +{"train_info/time_between_train_steps": 0.005166530609130859, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 27.373689651489258, "step": 433} +{"train_info/time_between_train_steps": 0.006650209426879883, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 27.3818256855011, "step": 434} +{"train_info/time_between_train_steps": 0.005235910415649414, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 27.401049375534058, "step": 435} +{"train_info/time_between_train_steps": 0.014569520950317383, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 27.406546115875244, "step": 436} +{"train_info/time_between_train_steps": 0.005319356918334961, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 27.378504514694214, "step": 437} +{"train_info/time_between_train_steps": 0.005420684814453125, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 27.418848514556885, "step": 438} +{"train_info/time_between_train_steps": 0.00533294677734375, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 27.409156560897827, "step": 439} +{"train_info/time_between_train_steps": 0.005242109298706055, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 27.440677881240845, "step": 440} +{"train_info/time_between_train_steps": 0.0053293704986572266, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 27.410297393798828, "step": 441} +{"train_info/time_between_train_steps": 0.0053598880767822266, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 27.445372343063354, "step": 442} +{"train_info/time_between_train_steps": 0.005576133728027344, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 27.40666627883911, "step": 443} +{"train_info/time_between_train_steps": 0.005277395248413086, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 27.547869443893433, "step": 444} +{"train_info/time_between_train_steps": 0.005454540252685547, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 27.565590381622314, "step": 445} +{"train_info/time_between_train_steps": 0.0065958499908447266, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 27.392098665237427, "step": 446} +{"train_info/time_between_train_steps": 0.006318569183349609, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 27.660049438476562, "step": 447} +{"train_info/time_between_train_steps": 0.0055353641510009766, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 27.430819988250732, "step": 448} +{"train_info/time_between_train_steps": 0.007032632827758789, "step": 448} +{"train_info/time_between_train_steps": 14.561387300491333, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 27.551758289337158, "step": 449} +{"train_info/time_between_train_steps": 0.00577545166015625, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 27.560842514038086, "step": 450} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732955979, "_runtime": 12617}, "step": 450} +{"logs": {"train/loss": 4.5929, "train/learning_rate": 0.00041666666666666664, "train/epoch": 16.0, "_timestamp": 1732955979, "_runtime": 12617}, "step": 450} +{"train_info/time_between_train_steps": 0.012284517288208008, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 27.467010736465454, "step": 451} +{"train_info/time_between_train_steps": 0.011240959167480469, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 27.57811975479126, "step": 452} +{"train_info/time_between_train_steps": 0.0053255558013916016, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 27.405463457107544, "step": 453} +{"train_info/time_between_train_steps": 0.010965108871459961, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 27.60713267326355, "step": 454} +{"train_info/time_between_train_steps": 0.005407571792602539, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 27.435887098312378, "step": 455} +{"train_info/time_between_train_steps": 0.01471400260925293, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 27.47846221923828, "step": 456} +{"train_info/time_between_train_steps": 0.005188703536987305, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 27.398728609085083, "step": 457} +{"train_info/time_between_train_steps": 0.00529170036315918, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 27.417346239089966, "step": 458} +{"train_info/time_between_train_steps": 0.005007266998291016, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 27.411901712417603, "step": 459} +{"train_info/time_between_train_steps": 0.005272865295410156, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 27.35456132888794, "step": 460} +{"train_info/time_between_train_steps": 0.007830619812011719, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 27.455092906951904, "step": 461} +{"train_info/time_between_train_steps": 0.005223274230957031, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 27.453541040420532, "step": 462} +{"train_info/time_between_train_steps": 0.005074262619018555, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 27.369553565979004, "step": 463} +{"train_info/time_between_train_steps": 0.010168075561523438, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 27.34481978416443, "step": 464} +{"train_info/time_between_train_steps": 0.004970550537109375, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 27.415329456329346, "step": 465} +{"train_info/time_between_train_steps": 0.00970602035522461, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 27.422064065933228, "step": 466} +{"train_info/time_between_train_steps": 0.009597301483154297, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 27.35051655769348, "step": 467} +{"train_info/time_between_train_steps": 0.004970550537109375, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 27.47574758529663, "step": 468} +{"train_info/time_between_train_steps": 0.005274534225463867, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 27.36225652694702, "step": 469} +{"train_info/time_between_train_steps": 0.005081653594970703, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 27.452273845672607, "step": 470} +{"train_info/time_between_train_steps": 0.00520634651184082, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 27.364166259765625, "step": 471} +{"train_info/time_between_train_steps": 0.005267143249511719, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 27.40774941444397, "step": 472} +{"train_info/time_between_train_steps": 0.009908676147460938, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 27.395052671432495, "step": 473} +{"train_info/time_between_train_steps": 0.005341291427612305, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 27.37962818145752, "step": 474} +{"train_info/time_between_train_steps": 0.005530834197998047, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 27.387266874313354, "step": 475} +{"train_info/time_between_train_steps": 0.00532078742980957, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 27.392597675323486, "step": 476} +{"train_info/time_between_train_steps": 0.005724430084228516, "step": 476} +{"train_info/time_between_train_steps": 14.731572151184082, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 27.3564875125885, "step": 477} +{"train_info/time_between_train_steps": 0.010548114776611328, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 27.849870681762695, "step": 478} +{"train_info/time_between_train_steps": 0.005453348159790039, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 27.400803089141846, "step": 479} +{"train_info/time_between_train_steps": 0.006481647491455078, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 27.63504719734192, "step": 480} +{"train_info/time_between_train_steps": 0.005303621292114258, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 27.43759059906006, "step": 481} +{"train_info/time_between_train_steps": 0.005166053771972656, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 27.560839653015137, "step": 482} +{"train_info/time_between_train_steps": 0.005178928375244141, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 27.409789323806763, "step": 483} +{"train_info/time_between_train_steps": 0.005483150482177734, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 27.476816654205322, "step": 484} +{"train_info/time_between_train_steps": 0.005235910415649414, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 27.411752223968506, "step": 485} +{"train_info/time_between_train_steps": 0.0062677860260009766, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 27.386511087417603, "step": 486} +{"train_info/time_between_train_steps": 0.004887580871582031, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 27.40527319908142, "step": 487} +{"train_info/time_between_train_steps": 0.005247592926025391, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 27.41469120979309, "step": 488} +{"train_info/time_between_train_steps": 0.0051686763763427734, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 27.451054334640503, "step": 489} +{"train_info/time_between_train_steps": 0.005136966705322266, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 27.461090803146362, "step": 490} +{"train_info/time_between_train_steps": 0.01003575325012207, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 27.411916971206665, "step": 491} +{"train_info/time_between_train_steps": 0.010218143463134766, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 27.42366600036621, "step": 492} +{"train_info/time_between_train_steps": 0.00531458854675293, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 27.466278791427612, "step": 493} +{"train_info/time_between_train_steps": 0.005101442337036133, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 27.365323543548584, "step": 494} +{"train_info/time_between_train_steps": 0.004967212677001953, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 27.40524458885193, "step": 495} +{"train_info/time_between_train_steps": 0.005963802337646484, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 27.4531991481781, "step": 496} +{"train_info/time_between_train_steps": 0.008008241653442383, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 27.377198457717896, "step": 497} +{"train_info/time_between_train_steps": 0.0051631927490234375, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 27.41167664527893, "step": 498} +{"train_info/time_between_train_steps": 0.005323171615600586, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 27.397770881652832, "step": 499} +{"train_info/time_between_train_steps": 0.00503230094909668, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 27.400275468826294, "step": 500} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732957366, "_runtime": 14004}, "step": 500} +{"logs": {"train/loss": 4.3775, "train/learning_rate": 0.00038888888888888887, "train/epoch": 17.02, "_timestamp": 1732957366, "_runtime": 14004}, "step": 500} +{"train_info/time_between_train_steps": 2.485002040863037, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 27.384706020355225, "step": 501} +{"train_info/time_between_train_steps": 0.005383014678955078, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 27.476587295532227, "step": 502} +{"train_info/time_between_train_steps": 0.005262136459350586, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 27.469440937042236, "step": 503} +{"train_info/time_between_train_steps": 0.005511283874511719, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 27.44100022315979, "step": 504} +{"train_info/time_between_train_steps": 0.005827426910400391, "step": 504} +{"train_info/time_between_train_steps": 14.387930393218994, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 27.425212383270264, "step": 505} +{"train_info/time_between_train_steps": 0.00502777099609375, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 27.52327823638916, "step": 506} +{"train_info/time_between_train_steps": 0.00502777099609375, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 27.351639986038208, "step": 507} +{"train_info/time_between_train_steps": 0.005405902862548828, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 27.49671196937561, "step": 508} +{"train_info/time_between_train_steps": 0.010040760040283203, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 27.58409285545349, "step": 509} +{"train_info/time_between_train_steps": 0.005413532257080078, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 27.53577947616577, "step": 510} +{"train_info/time_between_train_steps": 0.005278110504150391, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 27.36227560043335, "step": 511} +{"train_info/time_between_train_steps": 0.009971857070922852, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 27.504581689834595, "step": 512} +{"train_info/time_between_train_steps": 0.006666660308837891, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 27.429280281066895, "step": 513} +{"train_info/time_between_train_steps": 0.005113840103149414, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 27.37992525100708, "step": 514} +{"train_info/time_between_train_steps": 0.005255460739135742, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 27.3919997215271, "step": 515} +{"train_info/time_between_train_steps": 0.0049402713775634766, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 27.348214387893677, "step": 516} +{"train_info/time_between_train_steps": 0.005022764205932617, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 27.402419567108154, "step": 517} +{"train_info/time_between_train_steps": 0.010440349578857422, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 27.459072828292847, "step": 518} +{"train_info/time_between_train_steps": 0.004982948303222656, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 27.425938844680786, "step": 519} +{"train_info/time_between_train_steps": 0.005048274993896484, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 27.373964548110962, "step": 520} +{"train_info/time_between_train_steps": 0.004998445510864258, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 27.462026357650757, "step": 521} +{"train_info/time_between_train_steps": 0.0051386356353759766, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 27.403549909591675, "step": 522} +{"train_info/time_between_train_steps": 0.0050296783447265625, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 27.34731435775757, "step": 523} +{"train_info/time_between_train_steps": 0.004960775375366211, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 27.481279611587524, "step": 524} +{"train_info/time_between_train_steps": 0.0071637630462646484, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 27.3782742023468, "step": 525} +{"train_info/time_between_train_steps": 0.009931564331054688, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 27.46433711051941, "step": 526} +{"train_info/time_between_train_steps": 0.005204439163208008, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 27.409592628479004, "step": 527} +{"train_info/time_between_train_steps": 0.0051653385162353516, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 27.472877264022827, "step": 528} +{"train_info/time_between_train_steps": 0.009920120239257812, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 27.443860054016113, "step": 529} +{"train_info/time_between_train_steps": 0.005280017852783203, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 27.62702178955078, "step": 530} +{"train_info/time_between_train_steps": 0.009839773178100586, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 27.414392948150635, "step": 531} +{"train_info/time_between_train_steps": 0.005599021911621094, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 27.44342350959778, "step": 532} +{"train_info/time_between_train_steps": 0.005867481231689453, "step": 532} +{"train_info/time_between_train_steps": 14.644532442092896, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 27.383628845214844, "step": 533} +{"train_info/time_between_train_steps": 0.009904861450195312, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 27.522279500961304, "step": 534} +{"train_info/time_between_train_steps": 0.006003141403198242, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 27.471596240997314, "step": 535} +{"train_info/time_between_train_steps": 0.005340099334716797, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 27.524942874908447, "step": 536} +{"train_info/time_between_train_steps": 0.005388975143432617, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 27.4409339427948, "step": 537} +{"train_info/time_between_train_steps": 0.005594730377197266, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 27.629844188690186, "step": 538} +{"train_info/time_between_train_steps": 0.006757259368896484, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 27.56245756149292, "step": 539} +{"train_info/time_between_train_steps": 0.017190217971801758, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 27.49901008605957, "step": 540} +{"train_info/time_between_train_steps": 0.007551908493041992, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 27.460780382156372, "step": 541} +{"train_info/time_between_train_steps": 0.0051386356353759766, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 27.417933702468872, "step": 542} +{"train_info/time_between_train_steps": 0.005185842514038086, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 27.407943964004517, "step": 543} +{"train_info/time_between_train_steps": 0.005177736282348633, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 27.389667510986328, "step": 544} +{"train_info/time_between_train_steps": 0.005255937576293945, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 27.4077889919281, "step": 545} +{"train_info/time_between_train_steps": 0.005246877670288086, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 27.441047430038452, "step": 546} +{"train_info/time_between_train_steps": 0.005202293395996094, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 27.379933834075928, "step": 547} +{"train_info/time_between_train_steps": 0.00526118278503418, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 27.34251117706299, "step": 548} +{"train_info/time_between_train_steps": 0.00505828857421875, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 27.377382516860962, "step": 549} +{"train_info/time_between_train_steps": 0.006333827972412109, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 27.36552882194519, "step": 550} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732958770, "_runtime": 15408}, "step": 550} +{"logs": {"train/loss": 4.2825, "train/learning_rate": 0.0003611111111111111, "train/epoch": 19.02, "_timestamp": 1732958770, "_runtime": 15408}, "step": 550} +{"train_info/time_between_train_steps": 0.0070493221282958984, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 27.37840461730957, "step": 551} +{"train_info/time_between_train_steps": 0.010366678237915039, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 27.355515718460083, "step": 552} +{"train_info/time_between_train_steps": 0.005095005035400391, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 27.358213186264038, "step": 553} +{"train_info/time_between_train_steps": 0.005438327789306641, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 27.35242199897766, "step": 554} +{"train_info/time_between_train_steps": 0.006025791168212891, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 27.463391304016113, "step": 555} +{"train_info/time_between_train_steps": 0.005140542984008789, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 27.36911368370056, "step": 556} +{"train_info/time_between_train_steps": 0.009996175765991211, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 27.398763179779053, "step": 557} +{"train_info/time_between_train_steps": 0.0056612491607666016, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 27.50615882873535, "step": 558} +{"train_info/time_between_train_steps": 0.0053348541259765625, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 27.364774703979492, "step": 559} +{"train_info/time_between_train_steps": 0.005416154861450195, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 27.424458742141724, "step": 560} +{"train_info/time_between_train_steps": 0.005921602249145508, "step": 560} +{"train_info/time_between_train_steps": 14.552968502044678, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 27.38929271697998, "step": 561} +{"train_info/time_between_train_steps": 0.004904985427856445, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 27.529574632644653, "step": 562} +{"train_info/time_between_train_steps": 0.005636930465698242, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 27.37885308265686, "step": 563} +{"train_info/time_between_train_steps": 0.005209445953369141, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 27.55824041366577, "step": 564} +{"train_info/time_between_train_steps": 0.005246400833129883, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 27.432903051376343, "step": 565} +{"train_info/time_between_train_steps": 0.005396127700805664, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 27.559667348861694, "step": 566} +{"train_info/time_between_train_steps": 0.005360841751098633, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 27.39438557624817, "step": 567} +{"train_info/time_between_train_steps": 0.009409666061401367, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 27.432688236236572, "step": 568} +{"train_info/time_between_train_steps": 0.005276203155517578, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 27.413926362991333, "step": 569} +{"train_info/time_between_train_steps": 0.005208015441894531, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 27.517415046691895, "step": 570} +{"train_info/time_between_train_steps": 0.010483026504516602, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 27.386142253875732, "step": 571} +{"train_info/time_between_train_steps": 0.005025386810302734, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 27.38132119178772, "step": 572} +{"train_info/time_between_train_steps": 0.005093812942504883, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 27.35046648979187, "step": 573} +{"train_info/time_between_train_steps": 0.0051381587982177734, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 27.389291763305664, "step": 574} +{"train_info/time_between_train_steps": 0.005076408386230469, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 27.417850017547607, "step": 575} +{"train_info/time_between_train_steps": 0.005174160003662109, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 27.350409030914307, "step": 576} +{"train_info/time_between_train_steps": 0.00502324104309082, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 27.330822229385376, "step": 577} +{"train_info/time_between_train_steps": 0.004991292953491211, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 27.332847356796265, "step": 578} +{"train_info/time_between_train_steps": 0.010104894638061523, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 27.37904953956604, "step": 579} +{"train_info/time_between_train_steps": 0.00499415397644043, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 27.317119359970093, "step": 580} +{"train_info/time_between_train_steps": 0.004960775375366211, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 27.389820098876953, "step": 581} +{"train_info/time_between_train_steps": 0.005028486251831055, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 27.335578203201294, "step": 582} +{"train_info/time_between_train_steps": 0.005166053771972656, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 27.39159321784973, "step": 583} +{"train_info/time_between_train_steps": 0.005194664001464844, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 27.403791427612305, "step": 584} +{"train_info/time_between_train_steps": 0.005089282989501953, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 27.368080854415894, "step": 585} +{"train_info/time_between_train_steps": 0.00536799430847168, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 27.474808931350708, "step": 586} +{"train_info/time_between_train_steps": 0.00516200065612793, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 27.364625930786133, "step": 587} +{"train_info/time_between_train_steps": 0.005552530288696289, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 27.40463876724243, "step": 588} +{"train_info/time_between_train_steps": 0.011212825775146484, "step": 588} +{"train_info/time_between_train_steps": 14.26477313041687, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 27.367685317993164, "step": 589} +{"train_info/time_between_train_steps": 0.004888057708740234, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 27.475945949554443, "step": 590} +{"train_info/time_between_train_steps": 0.005239009857177734, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 27.35436749458313, "step": 591} +{"train_info/time_between_train_steps": 0.00494384765625, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 27.46240210533142, "step": 592} +{"train_info/time_between_train_steps": 0.004967689514160156, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 27.35247826576233, "step": 593} +{"train_info/time_between_train_steps": 0.005334138870239258, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 27.536285877227783, "step": 594} +{"train_info/time_between_train_steps": 0.0056040287017822266, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 27.36477565765381, "step": 595} +{"train_info/time_between_train_steps": 0.010229825973510742, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 27.420100212097168, "step": 596} +{"train_info/time_between_train_steps": 0.005244255065917969, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 27.386051416397095, "step": 597} +{"train_info/time_between_train_steps": 0.0048754215240478516, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 27.37226915359497, "step": 598} +{"train_info/time_between_train_steps": 0.0051190853118896484, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 27.338716506958008, "step": 599} +{"train_info/time_between_train_steps": 0.00516510009765625, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 27.378249645233154, "step": 600} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732960169, "_runtime": 16807}, "step": 600} +{"logs": {"train/loss": 4.1618, "train/learning_rate": 0.0003333333333333333, "train/epoch": 21.01, "_timestamp": 1732960169, "_runtime": 16807}, "step": 600} +{"train_info/time_between_train_steps": 2.599018096923828, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 27.456621885299683, "step": 601} +{"train_info/time_between_train_steps": 0.00521087646484375, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 27.342956066131592, "step": 602} +{"train_info/time_between_train_steps": 0.005101680755615234, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 27.39888858795166, "step": 603} +{"train_info/time_between_train_steps": 0.005205869674682617, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 27.350632905960083, "step": 604} +{"train_info/time_between_train_steps": 0.010150909423828125, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 27.38350224494934, "step": 605} +{"train_info/time_between_train_steps": 0.005211591720581055, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 27.369261026382446, "step": 606} +{"train_info/time_between_train_steps": 0.00512385368347168, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 27.339521884918213, "step": 607} +{"train_info/time_between_train_steps": 0.009253740310668945, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 27.376838207244873, "step": 608} +{"train_info/time_between_train_steps": 0.00508880615234375, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 27.357214212417603, "step": 609} +{"train_info/time_between_train_steps": 0.005151271820068359, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 27.38475513458252, "step": 610} +{"train_info/time_between_train_steps": 0.005220651626586914, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 27.3544020652771, "step": 611} +{"train_info/time_between_train_steps": 0.0052793025970458984, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 27.532174587249756, "step": 612} +{"train_info/time_between_train_steps": 0.005048513412475586, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 27.548478841781616, "step": 613} +{"train_info/time_between_train_steps": 0.005464076995849609, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 27.422115325927734, "step": 614} +{"train_info/time_between_train_steps": 0.010457515716552734, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 27.48460817337036, "step": 615} +{"train_info/time_between_train_steps": 0.010382652282714844, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 27.410476207733154, "step": 616} +{"train_info/time_between_train_steps": 0.0060732364654541016, "step": 616} +{"train_info/time_between_train_steps": 14.537981510162354, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 27.405377626419067, "step": 617} +{"train_info/time_between_train_steps": 0.015519857406616211, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 27.555547952651978, "step": 618} +{"train_info/time_between_train_steps": 0.005202293395996094, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 27.3924822807312, "step": 619} +{"train_info/time_between_train_steps": 0.005192756652832031, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 27.54568386077881, "step": 620} +{"train_info/time_between_train_steps": 0.00521087646484375, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 27.355827808380127, "step": 621} +{"train_info/time_between_train_steps": 0.005417585372924805, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 27.54618525505066, "step": 622} +{"train_info/time_between_train_steps": 0.00554203987121582, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 27.395650148391724, "step": 623} +{"train_info/time_between_train_steps": 0.010376453399658203, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 27.456217527389526, "step": 624} +{"train_info/time_between_train_steps": 0.006249904632568359, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 27.38831400871277, "step": 625} +{"train_info/time_between_train_steps": 0.004968166351318359, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 27.349529504776, "step": 626} +{"train_info/time_between_train_steps": 0.009943723678588867, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 27.354804515838623, "step": 627} +{"train_info/time_between_train_steps": 0.004930019378662109, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 27.39248752593994, "step": 628} +{"train_info/time_between_train_steps": 0.005110025405883789, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 27.33116364479065, "step": 629} +{"train_info/time_between_train_steps": 0.005043506622314453, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 27.34546446800232, "step": 630} +{"train_info/time_between_train_steps": 0.005862712860107422, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 27.352598428726196, "step": 631} +{"train_info/time_between_train_steps": 0.00507807731628418, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 27.47490167617798, "step": 632} +{"train_info/time_between_train_steps": 0.005623340606689453, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 27.349477529525757, "step": 633} +{"train_info/time_between_train_steps": 0.005139589309692383, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 27.39264726638794, "step": 634} +{"train_info/time_between_train_steps": 0.010056495666503906, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 27.372870445251465, "step": 635} +{"train_info/time_between_train_steps": 0.010207414627075195, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 27.3834969997406, "step": 636} +{"train_info/time_between_train_steps": 0.014603614807128906, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 27.34462571144104, "step": 637} +{"train_info/time_between_train_steps": 0.0050449371337890625, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 27.34814214706421, "step": 638} +{"train_info/time_between_train_steps": 0.005126476287841797, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 27.387181520462036, "step": 639} +{"train_info/time_between_train_steps": 0.005090475082397461, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 27.373268365859985, "step": 640} +{"train_info/time_between_train_steps": 0.0050716400146484375, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 27.382614612579346, "step": 641} +{"train_info/time_between_train_steps": 0.005096435546875, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 27.39117980003357, "step": 642} +{"train_info/time_between_train_steps": 0.0070531368255615234, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 27.38698959350586, "step": 643} +{"train_info/time_between_train_steps": 0.005428791046142578, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 27.411521196365356, "step": 644} +{"train_info/time_between_train_steps": 0.005645751953125, "step": 644} +{"train_info/time_between_train_steps": 14.604989767074585, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 27.3782901763916, "step": 645} +{"train_info/time_between_train_steps": 0.004934787750244141, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 27.498882055282593, "step": 646} +{"train_info/time_between_train_steps": 0.004991769790649414, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 27.413102388381958, "step": 647} +{"train_info/time_between_train_steps": 0.004860877990722656, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 27.509307384490967, "step": 648} +{"train_info/time_between_train_steps": 0.005391359329223633, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 27.343773365020752, "step": 649} +{"train_info/time_between_train_steps": 0.0102386474609375, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 27.509143829345703, "step": 650} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732961572, "_runtime": 18210}, "step": 650} +{"logs": {"train/loss": 4.0581, "train/learning_rate": 0.00030555555555555555, "train/epoch": 23.0, "_timestamp": 1732961572, "_runtime": 18210}, "step": 650} +{"train_info/time_between_train_steps": 0.007131814956665039, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 27.420578718185425, "step": 651} +{"train_info/time_between_train_steps": 0.0052564144134521484, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 27.409841060638428, "step": 652} +{"train_info/time_between_train_steps": 0.005105257034301758, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 27.40621280670166, "step": 653} +{"train_info/time_between_train_steps": 0.004808902740478516, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 27.397806882858276, "step": 654} +{"train_info/time_between_train_steps": 0.0047969818115234375, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 27.33405089378357, "step": 655} +{"train_info/time_between_train_steps": 0.004885673522949219, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 27.327107906341553, "step": 656} +{"train_info/time_between_train_steps": 0.0049898624420166016, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 27.365680694580078, "step": 657} +{"train_info/time_between_train_steps": 0.008038520812988281, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 27.337098360061646, "step": 658} +{"train_info/time_between_train_steps": 0.0049631595611572266, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 27.39322781562805, "step": 659} +{"train_info/time_between_train_steps": 0.005121469497680664, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 27.330379962921143, "step": 660} +{"train_info/time_between_train_steps": 0.005181550979614258, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 27.388463497161865, "step": 661} +{"train_info/time_between_train_steps": 0.005184173583984375, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 27.407628774642944, "step": 662} +{"train_info/time_between_train_steps": 0.005131244659423828, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 27.477169036865234, "step": 663} +{"train_info/time_between_train_steps": 0.005189180374145508, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 27.35877537727356, "step": 664} +{"train_info/time_between_train_steps": 0.005164623260498047, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 27.3612380027771, "step": 665} +{"train_info/time_between_train_steps": 0.005251169204711914, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 27.35472273826599, "step": 666} +{"train_info/time_between_train_steps": 0.0051419734954833984, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 27.39219355583191, "step": 667} +{"train_info/time_between_train_steps": 0.0051157474517822266, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 27.35436248779297, "step": 668} +{"train_info/time_between_train_steps": 0.010170459747314453, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 27.351065635681152, "step": 669} +{"train_info/time_between_train_steps": 0.010416984558105469, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 27.393577098846436, "step": 670} +{"train_info/time_between_train_steps": 0.005252838134765625, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 27.429184913635254, "step": 671} +{"train_info/time_between_train_steps": 0.00558161735534668, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 27.39733362197876, "step": 672} +{"train_info/time_between_train_steps": 0.006259441375732422, "step": 672} +{"train_info/time_between_train_steps": 14.31748104095459, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 27.37033176422119, "step": 673} +{"train_info/time_between_train_steps": 0.005397319793701172, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 27.60486102104187, "step": 674} +{"train_info/time_between_train_steps": 0.010645389556884766, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 27.34864568710327, "step": 675} +{"train_info/time_between_train_steps": 0.005446672439575195, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 27.547207832336426, "step": 676} +{"train_info/time_between_train_steps": 0.005275249481201172, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 27.359049797058105, "step": 677} +{"train_info/time_between_train_steps": 0.005644798278808594, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 27.689385652542114, "step": 678} +{"train_info/time_between_train_steps": 0.0053691864013671875, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 27.45151972770691, "step": 679} +{"train_info/time_between_train_steps": 0.006032228469848633, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 27.479542016983032, "step": 680} +{"train_info/time_between_train_steps": 0.014070272445678711, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 27.4017436504364, "step": 681} +{"train_info/time_between_train_steps": 0.004921913146972656, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 27.330170154571533, "step": 682} +{"train_info/time_between_train_steps": 0.010355472564697266, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 27.36228632926941, "step": 683} +{"train_info/time_between_train_steps": 0.006216764450073242, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 27.348844528198242, "step": 684} +{"train_info/time_between_train_steps": 0.01011800765991211, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 27.35494828224182, "step": 685} +{"train_info/time_between_train_steps": 0.005117654800415039, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 27.35428762435913, "step": 686} +{"train_info/time_between_train_steps": 0.004920482635498047, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 27.37476944923401, "step": 687} +{"train_info/time_between_train_steps": 0.005395174026489258, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 27.352604866027832, "step": 688} +{"train_info/time_between_train_steps": 0.005022287368774414, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 27.361443758010864, "step": 689} +{"train_info/time_between_train_steps": 0.005703449249267578, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 27.38869047164917, "step": 690} +{"train_info/time_between_train_steps": 0.005482673645019531, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 27.365207195281982, "step": 691} +{"train_info/time_between_train_steps": 0.0050258636474609375, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 27.37930464744568, "step": 692} +{"train_info/time_between_train_steps": 0.00510716438293457, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 27.5777370929718, "step": 693} +{"train_info/time_between_train_steps": 0.005250453948974609, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 27.35702157020569, "step": 694} +{"train_info/time_between_train_steps": 0.005238771438598633, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 27.37230372428894, "step": 695} +{"train_info/time_between_train_steps": 0.005253791809082031, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 27.409749507904053, "step": 696} +{"train_info/time_between_train_steps": 0.0053102970123291016, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 27.36371088027954, "step": 697} +{"train_info/time_between_train_steps": 0.005620479583740234, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 27.39454174041748, "step": 698} +{"train_info/time_between_train_steps": 0.005480289459228516, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 27.44064164161682, "step": 699} +{"train_info/time_between_train_steps": 0.006682157516479492, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 27.39094638824463, "step": 700} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732962956, "_runtime": 19594}, "step": 700} +{"logs": {"train/loss": 3.9257, "train/learning_rate": 0.0002777777777777778, "train/epoch": 24.02, "_timestamp": 1732962956, "_runtime": 19594}, "step": 700} +{"train_info/time_between_train_steps": 2.425046682357788, "step": 700} +{"train_info/time_between_train_steps": 16.84418225288391, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 27.380696773529053, "step": 701} +{"train_info/time_between_train_steps": 0.017587900161743164, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 27.546189785003662, "step": 702} +{"train_info/time_between_train_steps": 0.005722761154174805, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 27.40714430809021, "step": 703} +{"train_info/time_between_train_steps": 0.00650477409362793, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 27.54293179512024, "step": 704} +{"train_info/time_between_train_steps": 0.01084136962890625, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 27.369733572006226, "step": 705} +{"train_info/time_between_train_steps": 0.005450725555419922, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 27.587786197662354, "step": 706} +{"train_info/time_between_train_steps": 0.005669593811035156, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 27.42255139350891, "step": 707} +{"train_info/time_between_train_steps": 0.00536346435546875, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 27.487499237060547, "step": 708} +{"train_info/time_between_train_steps": 0.006567239761352539, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 27.490625619888306, "step": 709} +{"train_info/time_between_train_steps": 0.005743265151977539, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 27.363353490829468, "step": 710} +{"train_info/time_between_train_steps": 0.01008915901184082, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 27.350857257843018, "step": 711} +{"train_info/time_between_train_steps": 0.005217075347900391, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 27.35559368133545, "step": 712} +{"train_info/time_between_train_steps": 0.0050847530364990234, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 27.386650323867798, "step": 713} +{"train_info/time_between_train_steps": 0.00506901741027832, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 27.52525568008423, "step": 714} +{"train_info/time_between_train_steps": 0.005031108856201172, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 27.36549472808838, "step": 715} +{"train_info/time_between_train_steps": 0.010058403015136719, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 27.34807515144348, "step": 716} +{"train_info/time_between_train_steps": 0.005168437957763672, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 27.37185549736023, "step": 717} +{"train_info/time_between_train_steps": 0.004992961883544922, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 27.380661010742188, "step": 718} +{"train_info/time_between_train_steps": 0.005228519439697266, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 27.363251447677612, "step": 719} +{"train_info/time_between_train_steps": 0.004974365234375, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 27.365166187286377, "step": 720} +{"train_info/time_between_train_steps": 0.010049581527709961, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 27.328962326049805, "step": 721} +{"train_info/time_between_train_steps": 0.005110502243041992, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 27.360947608947754, "step": 722} +{"train_info/time_between_train_steps": 0.005385160446166992, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 27.351595878601074, "step": 723} +{"train_info/time_between_train_steps": 0.005160808563232422, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 27.464221954345703, "step": 724} +{"train_info/time_between_train_steps": 0.004992961883544922, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 27.34104013442993, "step": 725} +{"train_info/time_between_train_steps": 0.005204200744628906, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 27.371082067489624, "step": 726} +{"train_info/time_between_train_steps": 0.0054361820220947266, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 27.36631464958191, "step": 727} +{"train_info/time_between_train_steps": 0.010417461395263672, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 27.360222578048706, "step": 728} +{"train_info/time_between_train_steps": 0.011232852935791016, "step": 728} +{"train_info/time_between_train_steps": 14.469428300857544, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 27.376702547073364, "step": 729} +{"train_info/time_between_train_steps": 0.004949331283569336, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 27.57305908203125, "step": 730} +{"train_info/time_between_train_steps": 0.005612850189208984, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 27.47185182571411, "step": 731} +{"train_info/time_between_train_steps": 0.005591869354248047, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 27.711939096450806, "step": 732} +{"train_info/time_between_train_steps": 0.00543212890625, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 27.365325689315796, "step": 733} +{"train_info/time_between_train_steps": 0.010821342468261719, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 27.602713584899902, "step": 734} +{"train_info/time_between_train_steps": 0.016039133071899414, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 27.45565962791443, "step": 735} +{"train_info/time_between_train_steps": 0.005585670471191406, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 27.486729621887207, "step": 736} +{"train_info/time_between_train_steps": 0.005262613296508789, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 27.40617537498474, "step": 737} +{"train_info/time_between_train_steps": 0.012117385864257812, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 27.357746362686157, "step": 738} +{"train_info/time_between_train_steps": 0.005010128021240234, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 27.381515979766846, "step": 739} +{"train_info/time_between_train_steps": 0.010099172592163086, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 27.538065433502197, "step": 740} +{"train_info/time_between_train_steps": 0.0050776004791259766, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 27.327643156051636, "step": 741} +{"train_info/time_between_train_steps": 0.005156755447387695, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 27.339282989501953, "step": 742} +{"train_info/time_between_train_steps": 0.0051670074462890625, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 27.34404420852661, "step": 743} +{"train_info/time_between_train_steps": 0.005107402801513672, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 27.3730525970459, "step": 744} +{"train_info/time_between_train_steps": 0.0055387020111083984, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 27.359381675720215, "step": 745} +{"train_info/time_between_train_steps": 0.010192394256591797, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 27.416288137435913, "step": 746} +{"train_info/time_between_train_steps": 0.005465507507324219, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 27.411457061767578, "step": 747} +{"train_info/time_between_train_steps": 0.010114192962646484, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 27.427094221115112, "step": 748} +{"train_info/time_between_train_steps": 0.005139350891113281, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 27.354456186294556, "step": 749} +{"train_info/time_between_train_steps": 0.005170583724975586, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 27.3542263507843, "step": 750} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732964359, "_runtime": 20997}, "step": 750} +{"logs": {"train/loss": 3.8778, "train/learning_rate": 0.00025, "train/epoch": 26.02, "_timestamp": 1732964359, "_runtime": 20997}, "step": 750} +{"train_info/time_between_train_steps": 0.006953716278076172, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 27.414233207702637, "step": 751} +{"train_info/time_between_train_steps": 0.0052947998046875, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 27.3979070186615, "step": 752} +{"train_info/time_between_train_steps": 0.005276203155517578, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 27.355117797851562, "step": 753} +{"train_info/time_between_train_steps": 0.005682706832885742, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 27.361659049987793, "step": 754} +{"train_info/time_between_train_steps": 0.0054204463958740234, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 27.497564554214478, "step": 755} +{"train_info/time_between_train_steps": 0.005414485931396484, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 27.427738666534424, "step": 756} +{"train_info/time_between_train_steps": 0.0057713985443115234, "step": 756} +{"train_info/time_between_train_steps": 14.235167741775513, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 27.33083176612854, "step": 757} +{"train_info/time_between_train_steps": 0.004965066909790039, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 27.467857599258423, "step": 758} +{"train_info/time_between_train_steps": 0.005036354064941406, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 27.352946519851685, "step": 759} +{"train_info/time_between_train_steps": 0.005499839782714844, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 27.528178453445435, "step": 760} +{"train_info/time_between_train_steps": 0.005380868911743164, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 27.333766222000122, "step": 761} +{"train_info/time_between_train_steps": 0.005671262741088867, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 27.512177228927612, "step": 762} +{"train_info/time_between_train_steps": 0.005530595779418945, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 27.37748122215271, "step": 763} +{"train_info/time_between_train_steps": 0.005500078201293945, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 27.569599866867065, "step": 764} +{"train_info/time_between_train_steps": 0.0052623748779296875, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 27.345947742462158, "step": 765} +{"train_info/time_between_train_steps": 0.004923582077026367, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 27.33440136909485, "step": 766} +{"train_info/time_between_train_steps": 0.0054585933685302734, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 27.34323215484619, "step": 767} +{"train_info/time_between_train_steps": 0.005098581314086914, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 27.464144468307495, "step": 768} +{"train_info/time_between_train_steps": 0.005094766616821289, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 27.328343629837036, "step": 769} +{"train_info/time_between_train_steps": 0.005251646041870117, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 27.42699146270752, "step": 770} +{"train_info/time_between_train_steps": 0.005306720733642578, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 27.327731132507324, "step": 771} +{"train_info/time_between_train_steps": 0.004957914352416992, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 27.349334716796875, "step": 772} +{"train_info/time_between_train_steps": 0.005115509033203125, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 27.382564306259155, "step": 773} +{"train_info/time_between_train_steps": 0.005224704742431641, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 27.38450312614441, "step": 774} +{"train_info/time_between_train_steps": 0.0052073001861572266, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 27.344525575637817, "step": 775} +{"train_info/time_between_train_steps": 0.006057024002075195, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 27.323281049728394, "step": 776} +{"train_info/time_between_train_steps": 0.009691953659057617, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 27.435433387756348, "step": 777} +{"train_info/time_between_train_steps": 0.0051610469818115234, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 27.38002872467041, "step": 778} +{"train_info/time_between_train_steps": 0.005282402038574219, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 27.356847524642944, "step": 779} +{"train_info/time_between_train_steps": 0.005198001861572266, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 27.352084636688232, "step": 780} +{"train_info/time_between_train_steps": 0.005215883255004883, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 27.34290385246277, "step": 781} +{"train_info/time_between_train_steps": 0.005255699157714844, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 27.336783170700073, "step": 782} +{"train_info/time_between_train_steps": 0.005148649215698242, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 27.357669591903687, "step": 783} +{"train_info/time_between_train_steps": 0.0057926177978515625, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 27.418681144714355, "step": 784} +{"train_info/time_between_train_steps": 0.01157069206237793, "step": 784} +{"train_info/time_between_train_steps": 14.427357196807861, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 27.3478524684906, "step": 785} +{"train_info/time_between_train_steps": 0.00496363639831543, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 27.6759135723114, "step": 786} +{"train_info/time_between_train_steps": 0.0053751468658447266, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 27.499250173568726, "step": 787} +{"train_info/time_between_train_steps": 0.005117893218994141, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 27.559462308883667, "step": 788} +{"train_info/time_between_train_steps": 0.01042628288269043, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 27.3852641582489, "step": 789} +{"train_info/time_between_train_steps": 0.005395412445068359, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 27.497934818267822, "step": 790} +{"train_info/time_between_train_steps": 0.005202770233154297, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 27.368998050689697, "step": 791} +{"train_info/time_between_train_steps": 0.005331277847290039, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 27.50881314277649, "step": 792} +{"train_info/time_between_train_steps": 0.00518035888671875, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 27.46592426300049, "step": 793} +{"train_info/time_between_train_steps": 0.010557174682617188, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 27.355913162231445, "step": 794} +{"train_info/time_between_train_steps": 0.005257368087768555, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 27.441570043563843, "step": 795} +{"train_info/time_between_train_steps": 0.005052328109741211, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 27.38957691192627, "step": 796} +{"train_info/time_between_train_steps": 0.005200386047363281, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 27.37576651573181, "step": 797} +{"train_info/time_between_train_steps": 0.005215644836425781, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 27.547805070877075, "step": 798} +{"train_info/time_between_train_steps": 0.005106687545776367, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 27.390848636627197, "step": 799} +{"train_info/time_between_train_steps": 0.005239725112915039, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 27.38970136642456, "step": 800} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732965758, "_runtime": 22396}, "step": 800} +{"logs": {"train/loss": 3.8028, "train/learning_rate": 0.00022222222222222218, "train/epoch": 28.01, "_timestamp": 1732965758, "_runtime": 22396}, "step": 800} +{"train_info/time_between_train_steps": 2.888195037841797, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 27.458361625671387, "step": 801} +{"train_info/time_between_train_steps": 0.005483150482177734, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 27.40594244003296, "step": 802} +{"train_info/time_between_train_steps": 0.00518488883972168, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 27.370822429656982, "step": 803} +{"train_info/time_between_train_steps": 0.005179405212402344, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 27.398735284805298, "step": 804} +{"train_info/time_between_train_steps": 0.01000070571899414, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 27.388243198394775, "step": 805} +{"train_info/time_between_train_steps": 0.005217075347900391, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 27.33607316017151, "step": 806} +{"train_info/time_between_train_steps": 0.005208730697631836, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 27.54135251045227, "step": 807} +{"train_info/time_between_train_steps": 0.005084514617919922, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 27.36804485321045, "step": 808} +{"train_info/time_between_train_steps": 0.005198478698730469, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 27.333654165267944, "step": 809} +{"train_info/time_between_train_steps": 0.005574464797973633, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 27.410329818725586, "step": 810} +{"train_info/time_between_train_steps": 0.005300998687744141, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 27.7903470993042, "step": 811} +{"train_info/time_between_train_steps": 0.015695571899414062, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 27.6545467376709, "step": 812} +{"train_info/time_between_train_steps": 0.00585174560546875, "step": 812} +{"train_info/time_between_train_steps": 14.533042907714844, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 27.318543672561646, "step": 813} +{"train_info/time_between_train_steps": 0.004889249801635742, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 27.607649326324463, "step": 814} +{"train_info/time_between_train_steps": 0.005011558532714844, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 27.50578999519348, "step": 815} +{"train_info/time_between_train_steps": 0.004913330078125, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 27.617432594299316, "step": 816} +{"train_info/time_between_train_steps": 0.00542759895324707, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 27.40007710456848, "step": 817} +{"train_info/time_between_train_steps": 0.0055179595947265625, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 27.477508783340454, "step": 818} +{"train_info/time_between_train_steps": 0.005110263824462891, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 27.392616510391235, "step": 819} +{"train_info/time_between_train_steps": 0.005469322204589844, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 27.47352385520935, "step": 820} +{"train_info/time_between_train_steps": 0.005502223968505859, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 27.383405685424805, "step": 821} +{"train_info/time_between_train_steps": 0.005223751068115234, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 27.36199450492859, "step": 822} +{"train_info/time_between_train_steps": 0.00912928581237793, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 27.342135667800903, "step": 823} +{"train_info/time_between_train_steps": 0.004922151565551758, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 27.344820022583008, "step": 824} +{"train_info/time_between_train_steps": 0.0049591064453125, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 27.337641716003418, "step": 825} +{"train_info/time_between_train_steps": 0.004906654357910156, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 27.372469902038574, "step": 826} +{"train_info/time_between_train_steps": 0.009962320327758789, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 27.377406358718872, "step": 827} +{"train_info/time_between_train_steps": 0.005043745040893555, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 27.36602807044983, "step": 828} +{"train_info/time_between_train_steps": 0.005284786224365234, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 27.417072534561157, "step": 829} +{"train_info/time_between_train_steps": 0.005449771881103516, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 27.398725032806396, "step": 830} +{"train_info/time_between_train_steps": 0.005226612091064453, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 27.415894508361816, "step": 831} +{"train_info/time_between_train_steps": 0.0053675174713134766, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 27.502097129821777, "step": 832} +{"train_info/time_between_train_steps": 0.006281375885009766, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 27.414133548736572, "step": 833} +{"train_info/time_between_train_steps": 0.006181240081787109, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 27.37258267402649, "step": 834} +{"train_info/time_between_train_steps": 0.0053064823150634766, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 27.391571760177612, "step": 835} +{"train_info/time_between_train_steps": 0.010097742080688477, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 27.365676641464233, "step": 836} +{"train_info/time_between_train_steps": 0.00526118278503418, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 27.391035795211792, "step": 837} +{"train_info/time_between_train_steps": 0.010727405548095703, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 27.367578268051147, "step": 838} +{"train_info/time_between_train_steps": 0.0052721500396728516, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 27.41707420349121, "step": 839} +{"train_info/time_between_train_steps": 0.014554500579833984, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 27.412628412246704, "step": 840} +{"train_info/time_between_train_steps": 0.00544428825378418, "step": 840} +{"train_info/time_between_train_steps": 14.393564224243164, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 27.34972333908081, "step": 841} +{"train_info/time_between_train_steps": 0.010002374649047852, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 27.43246030807495, "step": 842} +{"train_info/time_between_train_steps": 0.010016441345214844, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 27.349250555038452, "step": 843} +{"train_info/time_between_train_steps": 0.004910707473754883, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 27.488919734954834, "step": 844} +{"train_info/time_between_train_steps": 0.010004043579101562, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 27.392198085784912, "step": 845} +{"train_info/time_between_train_steps": 0.00797271728515625, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 27.510498523712158, "step": 846} +{"train_info/time_between_train_steps": 0.011119365692138672, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 27.649535655975342, "step": 847} +{"train_info/time_between_train_steps": 0.009504318237304688, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 27.520158529281616, "step": 848} +{"train_info/time_between_train_steps": 0.009366989135742188, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 27.452059030532837, "step": 849} +{"train_info/time_between_train_steps": 0.004864931106567383, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 27.348304510116577, "step": 850} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732967162, "_runtime": 23800}, "step": 850} +{"logs": {"train/loss": 3.7326, "train/learning_rate": 0.00019444444444444443, "train/epoch": 30.01, "_timestamp": 1732967162, "_runtime": 23800}, "step": 850} +{"train_info/time_between_train_steps": 0.012651205062866211, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 27.347227096557617, "step": 851} +{"train_info/time_between_train_steps": 0.004942178726196289, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 27.343910455703735, "step": 852} +{"train_info/time_between_train_steps": 0.004936933517456055, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 27.35377025604248, "step": 853} +{"train_info/time_between_train_steps": 0.005097627639770508, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 27.35520362854004, "step": 854} +{"train_info/time_between_train_steps": 0.005089521408081055, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 27.34908175468445, "step": 855} +{"train_info/time_between_train_steps": 0.0051364898681640625, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 27.370990991592407, "step": 856} +{"train_info/time_between_train_steps": 0.004928112030029297, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 27.347988605499268, "step": 857} +{"train_info/time_between_train_steps": 0.006231784820556641, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 27.386400938034058, "step": 858} +{"train_info/time_between_train_steps": 0.004980325698852539, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 27.34672999382019, "step": 859} +{"train_info/time_between_train_steps": 0.005990505218505859, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 27.366875886917114, "step": 860} +{"train_info/time_between_train_steps": 0.004991769790649414, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 27.35469102859497, "step": 861} +{"train_info/time_between_train_steps": 0.005511045455932617, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 27.351519346237183, "step": 862} +{"train_info/time_between_train_steps": 0.0053005218505859375, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 27.4708149433136, "step": 863} +{"train_info/time_between_train_steps": 0.005052328109741211, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 27.362329483032227, "step": 864} +{"train_info/time_between_train_steps": 0.005230903625488281, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 27.37685251235962, "step": 865} +{"train_info/time_between_train_steps": 0.005389213562011719, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 27.35214114189148, "step": 866} +{"train_info/time_between_train_steps": 0.0053479671478271484, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 27.365079641342163, "step": 867} +{"train_info/time_between_train_steps": 0.005354404449462891, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 27.58150625228882, "step": 868} +{"train_info/time_between_train_steps": 0.005744218826293945, "step": 868} +{"train_info/time_between_train_steps": 14.422609567642212, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 27.36876630783081, "step": 869} +{"train_info/time_between_train_steps": 0.004946470260620117, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 27.41708779335022, "step": 870} +{"train_info/time_between_train_steps": 0.004919290542602539, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 27.302520990371704, "step": 871} +{"train_info/time_between_train_steps": 0.009950399398803711, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 27.437844038009644, "step": 872} +{"train_info/time_between_train_steps": 0.004929065704345703, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 27.315258502960205, "step": 873} +{"train_info/time_between_train_steps": 0.004910469055175781, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 27.43929433822632, "step": 874} +{"train_info/time_between_train_steps": 0.005082130432128906, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 27.34148120880127, "step": 875} +{"train_info/time_between_train_steps": 0.0051958560943603516, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 27.452691078186035, "step": 876} +{"train_info/time_between_train_steps": 0.005104541778564453, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 27.353844165802002, "step": 877} +{"train_info/time_between_train_steps": 0.005061149597167969, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 27.517982244491577, "step": 878} +{"train_info/time_between_train_steps": 0.004964590072631836, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 27.366658926010132, "step": 879} +{"train_info/time_between_train_steps": 0.009972095489501953, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 27.34122633934021, "step": 880} +{"train_info/time_between_train_steps": 0.0049021244049072266, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 27.324838876724243, "step": 881} +{"train_info/time_between_train_steps": 0.004854679107666016, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 27.329590797424316, "step": 882} +{"train_info/time_between_train_steps": 0.005093097686767578, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 27.328425884246826, "step": 883} +{"train_info/time_between_train_steps": 0.004930973052978516, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 27.338297367095947, "step": 884} +{"train_info/time_between_train_steps": 0.005030393600463867, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 27.368547677993774, "step": 885} +{"train_info/time_between_train_steps": 0.0050334930419921875, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 27.323160409927368, "step": 886} +{"train_info/time_between_train_steps": 0.004974365234375, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 27.344984531402588, "step": 887} +{"train_info/time_between_train_steps": 0.0049991607666015625, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 27.33691096305847, "step": 888} +{"train_info/time_between_train_steps": 0.00493621826171875, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 27.482663869857788, "step": 889} +{"train_info/time_between_train_steps": 0.009117603302001953, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 27.405107259750366, "step": 890} +{"train_info/time_between_train_steps": 0.012383460998535156, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 27.453049182891846, "step": 891} +{"train_info/time_between_train_steps": 0.005243778228759766, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 27.390873193740845, "step": 892} +{"train_info/time_between_train_steps": 0.0058209896087646484, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 27.400230407714844, "step": 893} +{"train_info/time_between_train_steps": 0.0055310726165771484, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 27.45716166496277, "step": 894} +{"train_info/time_between_train_steps": 0.005184650421142578, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 27.37423610687256, "step": 895} +{"train_info/time_between_train_steps": 0.005436420440673828, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 27.42351269721985, "step": 896} +{"train_info/time_between_train_steps": 0.005644798278808594, "step": 896} +{"train_info/time_between_train_steps": 14.550994634628296, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 27.393073797225952, "step": 897} +{"train_info/time_between_train_steps": 0.005125999450683594, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 27.49731683731079, "step": 898} +{"train_info/time_between_train_steps": 0.005347013473510742, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 27.3831045627594, "step": 899} +{"train_info/time_between_train_steps": 0.005128145217895508, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 27.521154403686523, "step": 900} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732968560, "_runtime": 25198}, "step": 900} +{"logs": {"train/loss": 3.6708, "train/learning_rate": 0.00016666666666666666, "train/epoch": 32.0, "_timestamp": 1732968560, "_runtime": 25198}, "step": 900} +{"train_info/time_between_train_steps": 2.504204511642456, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 27.369636297225952, "step": 901} +{"train_info/time_between_train_steps": 0.00515294075012207, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 27.50188374519348, "step": 902} +{"train_info/time_between_train_steps": 0.005043745040893555, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 27.376912593841553, "step": 903} +{"train_info/time_between_train_steps": 0.005427837371826172, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 27.44025468826294, "step": 904} +{"train_info/time_between_train_steps": 0.005164146423339844, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 27.372788667678833, "step": 905} +{"train_info/time_between_train_steps": 0.010108470916748047, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 27.384625673294067, "step": 906} +{"train_info/time_between_train_steps": 0.005033254623413086, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 27.343531847000122, "step": 907} +{"train_info/time_between_train_steps": 0.0050716400146484375, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 27.34908390045166, "step": 908} +{"train_info/time_between_train_steps": 0.005089998245239258, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 27.47504472732544, "step": 909} +{"train_info/time_between_train_steps": 0.005209207534790039, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 27.345189809799194, "step": 910} +{"train_info/time_between_train_steps": 0.005023002624511719, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 27.35489320755005, "step": 911} +{"train_info/time_between_train_steps": 0.005216836929321289, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 27.381537199020386, "step": 912} +{"train_info/time_between_train_steps": 0.01003575325012207, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 27.37947702407837, "step": 913} +{"train_info/time_between_train_steps": 0.005039691925048828, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 27.37859082221985, "step": 914} +{"train_info/time_between_train_steps": 0.005069255828857422, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 27.373042106628418, "step": 915} +{"train_info/time_between_train_steps": 0.00525665283203125, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 27.43205690383911, "step": 916} +{"train_info/time_between_train_steps": 0.005013942718505859, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 27.365741968154907, "step": 917} +{"train_info/time_between_train_steps": 0.006127119064331055, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 27.33978271484375, "step": 918} +{"train_info/time_between_train_steps": 0.005108356475830078, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 27.37563943862915, "step": 919} +{"train_info/time_between_train_steps": 0.0049860477447509766, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 27.40314817428589, "step": 920} +{"train_info/time_between_train_steps": 0.010256052017211914, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 27.35720705986023, "step": 921} +{"train_info/time_between_train_steps": 0.0054035186767578125, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 27.432254552841187, "step": 922} +{"train_info/time_between_train_steps": 0.005255937576293945, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 27.348121404647827, "step": 923} +{"train_info/time_between_train_steps": 0.00516819953918457, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 27.498533010482788, "step": 924} +{"train_info/time_between_train_steps": 0.006676435470581055, "step": 924} +{"train_info/time_between_train_steps": 14.585500240325928, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 27.317455530166626, "step": 925} +{"train_info/time_between_train_steps": 0.00489044189453125, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 27.43659734725952, "step": 926} +{"train_info/time_between_train_steps": 0.0049097537994384766, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 27.34400773048401, "step": 927} +{"train_info/time_between_train_steps": 0.00499415397644043, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 27.538593530654907, "step": 928} +{"train_info/time_between_train_steps": 0.013259410858154297, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 27.392775297164917, "step": 929} +{"train_info/time_between_train_steps": 0.005485057830810547, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 27.544299602508545, "step": 930} +{"train_info/time_between_train_steps": 0.005384683609008789, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 27.39079761505127, "step": 931} +{"train_info/time_between_train_steps": 0.005626201629638672, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 27.456755876541138, "step": 932} +{"train_info/time_between_train_steps": 0.005229949951171875, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 27.40570831298828, "step": 933} +{"train_info/time_between_train_steps": 0.0052907466888427734, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 27.32296323776245, "step": 934} +{"train_info/time_between_train_steps": 0.009825706481933594, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 27.330177068710327, "step": 935} +{"train_info/time_between_train_steps": 0.0050048828125, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 27.342533349990845, "step": 936} +{"train_info/time_between_train_steps": 0.010121822357177734, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 27.403208255767822, "step": 937} +{"train_info/time_between_train_steps": 0.005312204360961914, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 27.3336820602417, "step": 938} +{"train_info/time_between_train_steps": 0.005172014236450195, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 27.617981672286987, "step": 939} +{"train_info/time_between_train_steps": 0.004990100860595703, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 27.424041271209717, "step": 940} +{"train_info/time_between_train_steps": 0.005048513412475586, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 27.352221965789795, "step": 941} +{"train_info/time_between_train_steps": 0.01587843894958496, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 27.36846160888672, "step": 942} +{"train_info/time_between_train_steps": 0.004963874816894531, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 27.395625829696655, "step": 943} +{"train_info/time_between_train_steps": 0.005022287368774414, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 27.380393266677856, "step": 944} +{"train_info/time_between_train_steps": 0.013918161392211914, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 27.34657120704651, "step": 945} +{"train_info/time_between_train_steps": 0.005243539810180664, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 27.377633810043335, "step": 946} +{"train_info/time_between_train_steps": 0.005080223083496094, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 27.33180284500122, "step": 947} +{"train_info/time_between_train_steps": 0.004938602447509766, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 27.35926127433777, "step": 948} +{"train_info/time_between_train_steps": 0.004989147186279297, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 27.345019817352295, "step": 949} +{"train_info/time_between_train_steps": 0.0051822662353515625, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 27.35751724243164, "step": 950} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732969947, "_runtime": 26585}, "step": 950} +{"logs": {"train/loss": 3.5837, "train/learning_rate": 0.0001388888888888889, "train/epoch": 33.02, "_timestamp": 1732969947, "_runtime": 26585}, "step": 950} +{"train_info/time_between_train_steps": 0.007183074951171875, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 27.38088035583496, "step": 951} +{"train_info/time_between_train_steps": 0.005270242691040039, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 27.416763067245483, "step": 952} +{"train_info/time_between_train_steps": 0.0057010650634765625, "step": 952} +{"train_info/time_between_train_steps": 14.243439197540283, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 27.345094203948975, "step": 953} +{"train_info/time_between_train_steps": 0.004894733428955078, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 27.471033573150635, "step": 954} +{"train_info/time_between_train_steps": 0.004967927932739258, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 27.47834277153015, "step": 955} +{"train_info/time_between_train_steps": 0.00495457649230957, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 27.45175576210022, "step": 956} +{"train_info/time_between_train_steps": 0.0049440860748291016, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 27.36608862876892, "step": 957} +{"train_info/time_between_train_steps": 0.004984378814697266, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 27.50960683822632, "step": 958} +{"train_info/time_between_train_steps": 0.005217075347900391, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 27.38129997253418, "step": 959} +{"train_info/time_between_train_steps": 0.007818222045898438, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 27.45744514465332, "step": 960} +{"train_info/time_between_train_steps": 0.010600566864013672, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 27.405247688293457, "step": 961} +{"train_info/time_between_train_steps": 0.004958629608154297, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 27.384331226348877, "step": 962} +{"train_info/time_between_train_steps": 0.004931211471557617, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 27.384477615356445, "step": 963} +{"train_info/time_between_train_steps": 0.004881858825683594, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 27.32884430885315, "step": 964} +{"train_info/time_between_train_steps": 0.0048830509185791016, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 27.335591554641724, "step": 965} +{"train_info/time_between_train_steps": 0.004885435104370117, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 27.3916597366333, "step": 966} +{"train_info/time_between_train_steps": 0.0060694217681884766, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 27.392586708068848, "step": 967} +{"train_info/time_between_train_steps": 0.010121345520019531, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 27.36298155784607, "step": 968} +{"train_info/time_between_train_steps": 0.005220174789428711, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 27.355966091156006, "step": 969} +{"train_info/time_between_train_steps": 0.004956483840942383, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 27.344318389892578, "step": 970} +{"train_info/time_between_train_steps": 0.004971027374267578, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 27.502308130264282, "step": 971} +{"train_info/time_between_train_steps": 0.004973411560058594, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 27.37076234817505, "step": 972} +{"train_info/time_between_train_steps": 0.004889965057373047, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 27.45344042778015, "step": 973} +{"train_info/time_between_train_steps": 0.010263204574584961, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 27.34323811531067, "step": 974} +{"train_info/time_between_train_steps": 0.005143165588378906, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 27.347572803497314, "step": 975} +{"train_info/time_between_train_steps": 0.00500035285949707, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 27.371214866638184, "step": 976} +{"train_info/time_between_train_steps": 0.005049943923950195, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 27.347673892974854, "step": 977} +{"train_info/time_between_train_steps": 0.00637364387512207, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 27.40570878982544, "step": 978} +{"train_info/time_between_train_steps": 0.00533747673034668, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 27.375263929367065, "step": 979} +{"train_info/time_between_train_steps": 0.0055561065673828125, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 27.38311243057251, "step": 980} +{"train_info/time_between_train_steps": 0.005987644195556641, "step": 980} +{"train_info/time_between_train_steps": 14.50845980644226, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 27.34166669845581, "step": 981} +{"train_info/time_between_train_steps": 0.004921436309814453, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 27.512688636779785, "step": 982} +{"train_info/time_between_train_steps": 0.005130290985107422, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 27.35180687904358, "step": 983} +{"train_info/time_between_train_steps": 0.004912137985229492, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 27.4419527053833, "step": 984} +{"train_info/time_between_train_steps": 0.005189657211303711, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 27.349148511886597, "step": 985} +{"train_info/time_between_train_steps": 0.004900455474853516, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 27.549946069717407, "step": 986} +{"train_info/time_between_train_steps": 0.004963397979736328, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 27.511156797409058, "step": 987} +{"train_info/time_between_train_steps": 0.009853839874267578, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 27.490976333618164, "step": 988} +{"train_info/time_between_train_steps": 0.005087852478027344, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 27.349066734313965, "step": 989} +{"train_info/time_between_train_steps": 0.004901409149169922, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 27.482524871826172, "step": 990} +{"train_info/time_between_train_steps": 0.009389162063598633, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 27.452837228775024, "step": 991} +{"train_info/time_between_train_steps": 0.004931449890136719, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 27.3118999004364, "step": 992} +{"train_info/time_between_train_steps": 0.0049669742584228516, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 27.31308913230896, "step": 993} +{"train_info/time_between_train_steps": 0.009610414505004883, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 27.32130765914917, "step": 994} +{"train_info/time_between_train_steps": 0.004881620407104492, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 27.333659648895264, "step": 995} +{"train_info/time_between_train_steps": 0.004900455474853516, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 27.31041669845581, "step": 996} +{"train_info/time_between_train_steps": 0.005030632019042969, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 27.370150804519653, "step": 997} +{"train_info/time_between_train_steps": 0.016013383865356445, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 27.370003938674927, "step": 998} +{"train_info/time_between_train_steps": 0.014433622360229492, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 27.332770824432373, "step": 999} +{"train_info/time_between_train_steps": 0.0050199031829833984, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 27.320319414138794, "step": 1000} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 24044.0, "train_info/memory_max_reserved": 24044.0, "_timestamp": 1732971346, "_runtime": 27984}, "step": 1000} +{"logs": {"train/loss": 3.5664, "train/learning_rate": 0.00011111111111111109, "train/epoch": 35.02, "_timestamp": 1732971346, "_runtime": 27984}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 29668.0, "train_info/memory_max_reserved": 29668.0, "_timestamp": 1732971348, "_runtime": 27986}, "step": 1000} +{"logs": {"eval/loss": 4.406866073608398, "eval/runtime": 1.7783, "eval/samples_per_second": 52.296, "eval/steps_per_second": 3.374, "train/epoch": 35.02, "_timestamp": 1732971348, "_runtime": 27986}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 29668.0, "train_info/memory_max_reserved": 29668.0, "_timestamp": 1732971348, "_runtime": 27986}, "step": 1000} +{"logs": {"eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 4.406866073608398, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 82.01204064414016, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.7783, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 52.296, "train/epoch": 35.02, "_timestamp": 1732971348, "_runtime": 27986}, "step": 1000} +{"train_info/time_between_train_steps": 4.7773919105529785, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 27.431208610534668, "step": 1001} +{"train_info/time_between_train_steps": 0.0048139095306396484, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 27.41180682182312, "step": 1002} +{"train_info/time_between_train_steps": 0.0048215389251708984, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 27.32108187675476, "step": 1003} +{"train_info/time_between_train_steps": 0.004839420318603516, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 27.320451736450195, "step": 1004} +{"train_info/time_between_train_steps": 0.005010128021240234, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 27.364281177520752, "step": 1005} +{"train_info/time_between_train_steps": 0.005682468414306641, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 27.414071798324585, "step": 1006} +{"train_info/time_between_train_steps": 0.00531005859375, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 27.372719049453735, "step": 1007} +{"train_info/time_between_train_steps": 0.005356788635253906, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 27.397179126739502, "step": 1008} +{"train_info/time_between_train_steps": 0.0056934356689453125, "step": 1008} +{"train_info/time_between_train_steps": 14.53112530708313, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 27.411036491394043, "step": 1009} +{"train_info/time_between_train_steps": 0.005026817321777344, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 27.49738073348999, "step": 1010} +{"train_info/time_between_train_steps": 0.004975795745849609, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 27.374053716659546, "step": 1011} +{"train_info/time_between_train_steps": 0.00488734245300293, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 27.500240802764893, "step": 1012} +{"train_info/time_between_train_steps": 0.005369424819946289, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 27.397661447525024, "step": 1013} +{"train_info/time_between_train_steps": 0.005234718322753906, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 27.506496906280518, "step": 1014} +{"train_info/time_between_train_steps": 0.005211591720581055, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 27.367841720581055, "step": 1015} +{"train_info/time_between_train_steps": 0.006323575973510742, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 27.428961753845215, "step": 1016} +{"train_info/time_between_train_steps": 0.0050694942474365234, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 27.454017400741577, "step": 1017} +{"train_info/time_between_train_steps": 0.00477290153503418, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 27.33742642402649, "step": 1018} +{"train_info/time_between_train_steps": 0.004911661148071289, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 27.345220804214478, "step": 1019} +{"train_info/time_between_train_steps": 0.004978179931640625, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 27.332785844802856, "step": 1020} +{"train_info/time_between_train_steps": 0.005082368850708008, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 27.33430767059326, "step": 1021} +{"train_info/time_between_train_steps": 0.010217905044555664, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 27.324040174484253, "step": 1022} +{"train_info/time_between_train_steps": 0.009771108627319336, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 27.33608317375183, "step": 1023} +{"train_info/time_between_train_steps": 0.004914283752441406, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 27.3274085521698, "step": 1024} +{"train_info/time_between_train_steps": 0.0050144195556640625, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 27.33891010284424, "step": 1025} +{"train_info/time_between_train_steps": 0.005043506622314453, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 27.341954469680786, "step": 1026} +{"train_info/time_between_train_steps": 0.005011796951293945, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 27.385697841644287, "step": 1027} +{"train_info/time_between_train_steps": 0.004920005798339844, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 27.322439908981323, "step": 1028} +{"train_info/time_between_train_steps": 0.00494074821472168, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 27.329452514648438, "step": 1029} +{"train_info/time_between_train_steps": 0.010192394256591797, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 27.367571353912354, "step": 1030} +{"train_info/time_between_train_steps": 0.009891510009765625, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 27.35323739051819, "step": 1031} +{"train_info/time_between_train_steps": 0.0051004886627197266, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 27.443329095840454, "step": 1032} +{"train_info/time_between_train_steps": 0.005286216735839844, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 27.439502000808716, "step": 1033} +{"train_info/time_between_train_steps": 0.005066394805908203, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 27.339248418807983, "step": 1034} +{"train_info/time_between_train_steps": 0.008729696273803711, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 27.34734344482422, "step": 1035} +{"train_info/time_between_train_steps": 0.010203838348388672, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 27.49204993247986, "step": 1036} +{"train_info/time_between_train_steps": 0.005608320236206055, "step": 1036} +{"train_info/time_between_train_steps": 14.20327377319336, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 27.334174871444702, "step": 1037} +{"train_info/time_between_train_steps": 0.005268573760986328, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 27.494675397872925, "step": 1038} +{"train_info/time_between_train_steps": 0.005143165588378906, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 27.34322714805603, "step": 1039} +{"train_info/time_between_train_steps": 0.005124330520629883, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 27.49649691581726, "step": 1040} +{"train_info/time_between_train_steps": 0.005628824234008789, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 27.37523055076599, "step": 1041} +{"train_info/time_between_train_steps": 0.00535893440246582, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 27.468430042266846, "step": 1042} +{"train_info/time_between_train_steps": 0.005272626876831055, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 27.45012664794922, "step": 1043} +{"train_info/time_between_train_steps": 0.005329132080078125, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 27.435494899749756, "step": 1044} +{"train_info/time_between_train_steps": 0.0055119991302490234, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 27.554099321365356, "step": 1045} +{"train_info/time_between_train_steps": 0.009539604187011719, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 27.5129656791687, "step": 1046} +{"train_info/time_between_train_steps": 0.0052030086517333984, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 27.487191677093506, "step": 1047} +{"train_info/time_between_train_steps": 0.005248546600341797, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 27.378591060638428, "step": 1048} +{"train_info/time_between_train_steps": 0.0050928592681884766, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 27.507894277572632, "step": 1049} +{"train_info/time_between_train_steps": 0.0052335262298583984, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 27.40785813331604, "step": 1050} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 29668.0, "train_info/memory_max_reserved": 29668.0, "_timestamp": 1732972750, "_runtime": 29388}, "step": 1050} +{"logs": {"train/loss": 3.5206, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 37.01, "_timestamp": 1732972750, "_runtime": 29388}, "step": 1050} +{"train_info/time_between_train_steps": 0.006922006607055664, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 27.393562078475952, "step": 1051} +{"train_info/time_between_train_steps": 0.0051381587982177734, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 27.358165979385376, "step": 1052} +{"train_info/time_between_train_steps": 0.0058574676513671875, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 27.396403789520264, "step": 1053} +{"train_info/time_between_train_steps": 0.005100727081298828, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 27.381373405456543, "step": 1054} +{"train_info/time_between_train_steps": 0.005223512649536133, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 27.344074249267578, "step": 1055} +{"train_info/time_between_train_steps": 0.00514531135559082, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 27.34882140159607, "step": 1056} +{"train_info/time_between_train_steps": 0.0051233768463134766, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 27.364644765853882, "step": 1057} +{"train_info/time_between_train_steps": 0.010180950164794922, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 27.346140384674072, "step": 1058} +{"train_info/time_between_train_steps": 0.005295515060424805, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 27.35328221321106, "step": 1059} +{"train_info/time_between_train_steps": 0.00532221794128418, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 27.362610340118408, "step": 1060} +{"train_info/time_between_train_steps": 0.005208730697631836, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 27.359782934188843, "step": 1061} +{"train_info/time_between_train_steps": 0.005307435989379883, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 27.33596634864807, "step": 1062} +{"train_info/time_between_train_steps": 0.005331277847290039, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 27.452876329421997, "step": 1063} +{"train_info/time_between_train_steps": 0.00564885139465332, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 27.386932373046875, "step": 1064} +{"train_info/time_between_train_steps": 0.011069536209106445, "step": 1064} +{"train_info/time_between_train_steps": 14.41971492767334, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 27.328201055526733, "step": 1065} +{"train_info/time_between_train_steps": 0.004988193511962891, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 27.420017957687378, "step": 1066} +{"train_info/time_between_train_steps": 0.010076761245727539, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 27.36785387992859, "step": 1067} +{"train_info/time_between_train_steps": 0.011492729187011719, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 27.42728090286255, "step": 1068} +{"train_info/time_between_train_steps": 0.004921913146972656, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 27.31073760986328, "step": 1069} +{"train_info/time_between_train_steps": 0.005063056945800781, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 27.468422889709473, "step": 1070} +{"train_info/time_between_train_steps": 0.00545811653137207, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 27.346616506576538, "step": 1071} +{"train_info/time_between_train_steps": 0.005182743072509766, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 27.420605897903442, "step": 1072} +{"train_info/time_between_train_steps": 0.005125999450683594, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 27.38940119743347, "step": 1073} +{"train_info/time_between_train_steps": 0.004976749420166016, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 27.338644981384277, "step": 1074} +{"train_info/time_between_train_steps": 0.009589433670043945, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 27.329747915267944, "step": 1075} +{"train_info/time_between_train_steps": 0.004826784133911133, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 27.39443278312683, "step": 1076} +{"train_info/time_between_train_steps": 0.004847049713134766, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 27.32842516899109, "step": 1077} +{"train_info/time_between_train_steps": 0.004903554916381836, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 27.45998525619507, "step": 1078} +{"train_info/time_between_train_steps": 0.007922172546386719, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 27.372350692749023, "step": 1079} +{"train_info/time_between_train_steps": 0.005008697509765625, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 27.31799054145813, "step": 1080} +{"train_info/time_between_train_steps": 0.005039215087890625, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 27.336786031723022, "step": 1081} +{"train_info/time_between_train_steps": 0.0051288604736328125, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 27.33043909072876, "step": 1082} +{"train_info/time_between_train_steps": 0.005082607269287109, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 27.312042236328125, "step": 1083} +{"train_info/time_between_train_steps": 0.0051233768463134766, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 27.308419704437256, "step": 1084} +{"train_info/time_between_train_steps": 0.004979848861694336, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 27.34181237220764, "step": 1085} +{"train_info/time_between_train_steps": 0.005146980285644531, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 27.316067934036255, "step": 1086} +{"train_info/time_between_train_steps": 0.005144834518432617, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 27.318158864974976, "step": 1087} +{"train_info/time_between_train_steps": 0.004996061325073242, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 27.33331847190857, "step": 1088} +{"train_info/time_between_train_steps": 0.0055904388427734375, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 27.332658052444458, "step": 1089} +{"train_info/time_between_train_steps": 0.005067110061645508, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 27.347911834716797, "step": 1090} +{"train_info/time_between_train_steps": 0.005214691162109375, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 27.341490983963013, "step": 1091} +{"train_info/time_between_train_steps": 0.005360603332519531, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 27.420515537261963, "step": 1092} +{"train_info/time_between_train_steps": 0.010636568069458008, "step": 1092} +{"train_info/time_between_train_steps": 14.597501277923584, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 27.432928800582886, "step": 1093} +{"train_info/time_between_train_steps": 0.004900217056274414, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 27.43008780479431, "step": 1094} +{"train_info/time_between_train_steps": 0.005036830902099609, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 27.35098934173584, "step": 1095} +{"train_info/time_between_train_steps": 0.004834890365600586, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 27.44139575958252, "step": 1096} +{"train_info/time_between_train_steps": 0.005399942398071289, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 27.34067940711975, "step": 1097} +{"train_info/time_between_train_steps": 0.004910707473754883, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 27.452319145202637, "step": 1098} +{"train_info/time_between_train_steps": 0.0049495697021484375, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 27.345139265060425, "step": 1099} +{"train_info/time_between_train_steps": 0.004924297332763672, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 27.54792594909668, "step": 1100} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 29668.0, "train_info/memory_max_reserved": 29668.0, "_timestamp": 1732974148, "_runtime": 30786}, "step": 1100} +{"logs": {"train/loss": 3.4829, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 39.01, "_timestamp": 1732974148, "_runtime": 30786}, "step": 1100} +{"train_info/time_between_train_steps": 3.214310646057129, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 27.3630793094635, "step": 1101} +{"train_info/time_between_train_steps": 0.0058553218841552734, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 27.336471796035767, "step": 1102} +{"train_info/time_between_train_steps": 0.004925727844238281, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 27.34246039390564, "step": 1103} +{"train_info/time_between_train_steps": 0.00968170166015625, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 27.37330174446106, "step": 1104} +{"train_info/time_between_train_steps": 0.00484013557434082, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 27.513426065444946, "step": 1105} +{"train_info/time_between_train_steps": 0.009281635284423828, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 27.31489634513855, "step": 1106} +{"train_info/time_between_train_steps": 0.004855632781982422, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 27.34264898300171, "step": 1107} +{"train_info/time_between_train_steps": 0.004857540130615234, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 27.375704765319824, "step": 1108} +{"train_info/time_between_train_steps": 0.009827375411987305, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 27.470547914505005, "step": 1109} +{"train_info/time_between_train_steps": 0.005303382873535156, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 27.36165165901184, "step": 1110} +{"train_info/time_between_train_steps": 0.004944562911987305, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 27.342216730117798, "step": 1111} +{"train_info/time_between_train_steps": 0.004927158355712891, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 27.338832139968872, "step": 1112} +{"train_info/time_between_train_steps": 0.004994392395019531, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 27.348243951797485, "step": 1113} +{"train_info/time_between_train_steps": 0.0049800872802734375, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 27.323084115982056, "step": 1114} +{"train_info/time_between_train_steps": 0.0051157474517822266, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 27.323593378067017, "step": 1115} +{"train_info/time_between_train_steps": 0.005102634429931641, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 27.32066559791565, "step": 1116} +{"train_info/time_between_train_steps": 0.005006551742553711, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 27.35703682899475, "step": 1117} +{"train_info/time_between_train_steps": 0.010167837142944336, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 27.34507393836975, "step": 1118} +{"train_info/time_between_train_steps": 0.006354808807373047, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 27.352386236190796, "step": 1119} +{"train_info/time_between_train_steps": 0.005726337432861328, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 27.373069763183594, "step": 1120} +{"train_info/time_between_train_steps": 0.005619525909423828, "step": 1120} +{"train_info/time_between_train_steps": 14.28618049621582, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 27.314789533615112, "step": 1121} +{"train_info/time_between_train_steps": 0.004840373992919922, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 27.44349479675293, "step": 1122} +{"train_info/time_between_train_steps": 0.004983425140380859, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 27.33321523666382, "step": 1123} +{"train_info/time_between_train_steps": 0.0050122737884521484, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 27.534753799438477, "step": 1124} +{"train_info/time_between_train_steps": 0.004911184310913086, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 27.328816413879395, "step": 1125} +{"train_info/time_between_train_steps": 0.004926443099975586, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 27.52060556411743, "step": 1126} +{"train_info/time_between_train_steps": 0.006326436996459961, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 27.36463952064514, "step": 1127} +{"train_info/time_between_train_steps": 0.005182027816772461, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 27.45394778251648, "step": 1128} +{"train_info/time_between_train_steps": 0.005176544189453125, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 27.3543119430542, "step": 1129} +{"train_info/time_between_train_steps": 0.004994869232177734, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 27.328104257583618, "step": 1130} +{"train_info/time_between_train_steps": 0.0048274993896484375, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 27.329365491867065, "step": 1131} +{"train_info/time_between_train_steps": 0.0048503875732421875, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 27.322458744049072, "step": 1132} +{"train_info/time_between_train_steps": 0.004924774169921875, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 27.363765954971313, "step": 1133} +{"train_info/time_between_train_steps": 0.00487518310546875, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 27.34439516067505, "step": 1134} +{"train_info/time_between_train_steps": 0.0048601627349853516, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 27.40508270263672, "step": 1135} +{"train_info/time_between_train_steps": 0.00503849983215332, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 27.332573890686035, "step": 1136} +{"train_info/time_between_train_steps": 0.005071401596069336, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 27.349607467651367, "step": 1137} +{"train_info/time_between_train_steps": 0.005009889602661133, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 27.360726356506348, "step": 1138} +{"train_info/time_between_train_steps": 0.008977174758911133, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 27.421371936798096, "step": 1139} +{"train_info/time_between_train_steps": 0.004969120025634766, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 27.530261754989624, "step": 1140} +{"train_info/time_between_train_steps": 0.0049648284912109375, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 27.426126956939697, "step": 1141} +{"train_info/time_between_train_steps": 0.005277395248413086, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 27.489620447158813, "step": 1142} +{"train_info/time_between_train_steps": 0.006994009017944336, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 27.386764526367188, "step": 1143} +{"train_info/time_between_train_steps": 0.0051631927490234375, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 27.340667247772217, "step": 1144} +{"train_info/time_between_train_steps": 0.005278825759887695, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 27.350002765655518, "step": 1145} +{"train_info/time_between_train_steps": 0.005282402038574219, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 27.371057987213135, "step": 1146} +{"train_info/time_between_train_steps": 0.005334138870239258, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 27.35930037498474, "step": 1147} +{"train_info/time_between_train_steps": 0.005323886871337891, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 27.37929368019104, "step": 1148} +{"train_info/time_between_train_steps": 0.010528087615966797, "step": 1148} +{"train_info/time_between_train_steps": 14.452694654464722, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 27.355302572250366, "step": 1149} +{"train_info/time_between_train_steps": 0.005185604095458984, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 27.494330644607544, "step": 1150} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 29668.0, "train_info/memory_max_reserved": 29668.0, "_timestamp": 1732975549, "_runtime": 32187}, "step": 1150} +{"logs": {"train/loss": 3.4513, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 41.0, "_timestamp": 1732975549, "_runtime": 32187}, "step": 1150} +{"train_info/time_between_train_steps": 0.007238626480102539, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 27.352154970169067, "step": 1151} +{"train_info/time_between_train_steps": 0.005337238311767578, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 27.478636980056763, "step": 1152} +{"train_info/time_between_train_steps": 0.005253791809082031, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 27.436278104782104, "step": 1153} +{"train_info/time_between_train_steps": 0.0055179595947265625, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 27.455222845077515, "step": 1154} +{"train_info/time_between_train_steps": 0.005042552947998047, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 27.45197868347168, "step": 1155} +{"train_info/time_between_train_steps": 0.005284547805786133, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 27.460033416748047, "step": 1156} +{"train_info/time_between_train_steps": 0.005225658416748047, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 27.36758518218994, "step": 1157} +{"train_info/time_between_train_steps": 0.005434989929199219, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 27.321454763412476, "step": 1158} +{"train_info/time_between_train_steps": 0.005097627639770508, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 27.32633399963379, "step": 1159} +{"train_info/time_between_train_steps": 0.005144357681274414, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 27.328033208847046, "step": 1160} +{"train_info/time_between_train_steps": 0.0049555301666259766, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 27.34916043281555, "step": 1161} +{"train_info/time_between_train_steps": 0.005037069320678711, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 27.38142418861389, "step": 1162} +{"train_info/time_between_train_steps": 0.0050203800201416016, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 27.335617780685425, "step": 1163} +{"train_info/time_between_train_steps": 0.01004338264465332, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 27.35347890853882, "step": 1164} +{"train_info/time_between_train_steps": 0.004975557327270508, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 27.367825031280518, "step": 1165} +{"train_info/time_between_train_steps": 0.004991292953491211, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 27.3373703956604, "step": 1166} +{"train_info/time_between_train_steps": 0.009873628616333008, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 27.33922290802002, "step": 1167} +{"train_info/time_between_train_steps": 0.005026340484619141, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 27.348556756973267, "step": 1168} +{"train_info/time_between_train_steps": 0.006028413772583008, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 27.331766366958618, "step": 1169} +{"train_info/time_between_train_steps": 0.005017995834350586, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 27.330111980438232, "step": 1170} +{"train_info/time_between_train_steps": 0.007193088531494141, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 27.446027755737305, "step": 1171} +{"train_info/time_between_train_steps": 0.004984617233276367, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 27.363875150680542, "step": 1172} +{"train_info/time_between_train_steps": 0.0050754547119140625, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 27.354674339294434, "step": 1173} +{"train_info/time_between_train_steps": 0.005175113677978516, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 27.378669023513794, "step": 1174} +{"train_info/time_between_train_steps": 0.008530616760253906, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 27.38292384147644, "step": 1175} +{"train_info/time_between_train_steps": 0.005762577056884766, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 27.386634826660156, "step": 1176} +{"train_info/time_between_train_steps": 0.010721445083618164, "step": 1176} +{"train_info/time_between_train_steps": 14.688899278640747, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 27.377665758132935, "step": 1177} +{"train_info/time_between_train_steps": 0.005727052688598633, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 27.515231132507324, "step": 1178} +{"train_info/time_between_train_steps": 0.005267620086669922, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 27.36989116668701, "step": 1179} +{"train_info/time_between_train_steps": 0.005185842514038086, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 27.473973751068115, "step": 1180} +{"train_info/time_between_train_steps": 0.0052759647369384766, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 27.368918657302856, "step": 1181} +{"train_info/time_between_train_steps": 0.005336284637451172, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 27.473164319992065, "step": 1182} +{"train_info/time_between_train_steps": 0.005350828170776367, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 27.35528540611267, "step": 1183} +{"train_info/time_between_train_steps": 0.0051119327545166016, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 27.45693016052246, "step": 1184} +{"train_info/time_between_train_steps": 0.005271196365356445, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 27.41675090789795, "step": 1185} +{"train_info/time_between_train_steps": 0.005214691162109375, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 27.58934497833252, "step": 1186} +{"train_info/time_between_train_steps": 0.012001991271972656, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 27.63501286506653, "step": 1187} +{"train_info/time_between_train_steps": 0.004859447479248047, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 27.59418797492981, "step": 1188} +{"train_info/time_between_train_steps": 0.010236501693725586, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 27.35122299194336, "step": 1189} +{"train_info/time_between_train_steps": 0.010181188583374023, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 27.3438458442688, "step": 1190} +{"train_info/time_between_train_steps": 0.004967212677001953, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 27.355674028396606, "step": 1191} +{"train_info/time_between_train_steps": 0.005098819732666016, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 27.379833698272705, "step": 1192} +{"train_info/time_between_train_steps": 0.0049686431884765625, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 27.363431692123413, "step": 1193} +{"train_info/time_between_train_steps": 0.005075693130493164, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 27.34337830543518, "step": 1194} +{"train_info/time_between_train_steps": 0.00572514533996582, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 27.349745512008667, "step": 1195} +{"train_info/time_between_train_steps": 0.005036354064941406, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 27.51261281967163, "step": 1196} +{"train_info/time_between_train_steps": 0.004941463470458984, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 27.613045930862427, "step": 1197} +{"train_info/time_between_train_steps": 0.013460874557495117, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 27.750579118728638, "step": 1198} +{"train_info/time_between_train_steps": 0.005246162414550781, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 27.664106369018555, "step": 1199} +{"train_info/time_between_train_steps": 0.011176586151123047, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 27.684306144714355, "step": 1200} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 29668.0, "train_info/memory_max_reserved": 29668.0, "_timestamp": 1732976935, "_runtime": 33573}, "step": 1200} +{"logs": {"train/loss": 3.3996, "train/learning_rate": 0.0, "train/epoch": 42.02, "_timestamp": 1732976935, "_runtime": 33573}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 2038.609375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 29668.0, "train_info/memory_max_reserved": 29668.0, "_timestamp": 1732976940, "_runtime": 33578}, "step": 1200} +{"logs": {"train/train_runtime": 33578.6942, "train/train_samples_per_second": 18.297, "train/train_steps_per_second": 0.036, "train/total_flos": 3.25662891835392e+17, "train/train_loss": 4.488721703688304, "train/epoch": 42.02, "_timestamp": 1732976940, "_runtime": 33578}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 2038.6083984375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 29668.0, "train_info/memory_max_reserved": 29668.0, "_timestamp": 1732976943, "_runtime": 33581}, "step": 1200} +{"logs": {"eval/loss": 4.422486305236816, "eval/runtime": 1.7865, "eval/samples_per_second": 52.058, "eval/steps_per_second": 3.359, "train/epoch": 42.02, "_timestamp": 1732976943, "_runtime": 33581}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 2038.6083984375, "train_info/memory_max_allocated": 21606.1669921875, "train_info/memory_reserved": 29668.0, "train_info/memory_max_reserved": 29668.0, "_timestamp": 1732976943, "_runtime": 33581}, "step": 1200} +{"logs": {"eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 4.422486305236816, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 83.30314513950648, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.7865, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 52.058, "train/epoch": 42.02, "_timestamp": 1732976943, "_runtime": 33581}, "step": 1200} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9a4f7619a791f31eca86b197752db03b0e3e9356 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f345259117a274fb2b1f9c61c5d386fa21d63ba4029129bae3eb685e538f3c8 +size 540327017 diff --git a/shuffle_control_ro_RO_randinit_seed53.log b/shuffle_control_ro_RO_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..3f5f4cb63a71eb78b512cf7e1fffde6218f7efdc --- /dev/null +++ b/shuffle_control_ro_RO_randinit_seed53.log @@ -0,0 +1,231 @@ +|=>> 11/30 [00:40:28] - mistral - INFO :: Starting Run: shuffle_control_ro_RO_randinit_seed53... +|=>> 11/30 [00:40:28] - mistral - INFO :: Setting Random Seed to 53! +|=>> 11/30 [00:40:28] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 11/30 [00:40:28] - mistral - INFO :: Using Configs For Model From: /local/xiulyang/mission-impossible-language-models/mistral/conf/models/gpt2-small-60000.json ... +|=>> 11/30 [00:40:28] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'embd_pdrop': 0.1, 'eos_token_id': 0, 'bos_token_id': 0, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 60000} ... +|=>> 11/30 [00:40:28] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 11/30 [00:40:28] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 11/30 [00:40:28] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 11/30 [00:40:32] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 11/30 [00:40:32] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 11/30 [00:40:32] - mistral - INFO :: Downloading and Preprocessing Dataset `/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py`... +|=>> 11/30 [00:40:32] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Generating examples from = /local/xiulyang/mission-impossible-language-models/data/multilingual/multilingual_data_perturbed/shuffle_control_ro/train +|=>> 11/30 [00:40:32] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Total sentences: 1028659 +|=>> 11/30 [00:40:33] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Loading pre-tokenized data +|=>> 11/30 [00:40:38] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 11/30 [00:40:38] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 11/30 [00:40:39] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 11/30 [00:40:42] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Generating examples from = /local/xiulyang/mission-impossible-language-models/data/multilingual/multilingual_data_perturbed/shuffle_control_ro/dev +|=>> 11/30 [00:40:42] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Total sentences: 5121 +|=>> 11/30 [00:40:42] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Loading pre-tokenized data +|=>> 11/30 [00:40:42] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 11/30 [00:40:42] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 11/30 [00:40:42] - datasets_modules.datasets.babylm_dataset.448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183.babylm_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 11/30 [00:40:42] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 11/30 [00:40:42] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 11/30 [00:41:12] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 11/30 [00:41:12] - mistral - INFO :: Initializing Model Trainer... +|=>> 11/30 [00:41:12] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//local/xiulyang/babylm_models/shuffle_control_ro_RO_randinit/babylm_shuffle_control_ro_RO_randinit_seed53/runs/shuffle_control_ro_RO_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=shuffle_control_ro_RO_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 11/30 [00:41:13] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 11/30 [00:41:17] - mistral - INFO :: Training... +|=>> 11/30 [00:41:17] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 11/30 [06:09:13] - mistral - INFO :: Starting Run: shuffle_control_ro_RO_randinit_seed53... +|=>> 11/30 [06:09:13] - mistral - INFO :: Setting Random Seed to 53! +|=>> 11/30 [06:09:13] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 11/30 [06:09:13] - mistral - INFO :: Using Configs For Model From: /local/xiulyang/mission-impossible-language-models/mistral/conf/models/gpt2-small-60000.json ... +|=>> 11/30 [06:09:13] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'embd_pdrop': 0.1, 'eos_token_id': 0, 'bos_token_id': 0, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 60000} ... +|=>> 11/30 [06:09:13] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 11/30 [06:09:13] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 11/30 [06:09:13] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 11/30 [06:09:17] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 11/30 [06:09:17] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 11/30 [06:09:17] - mistral - INFO :: Downloading and Preprocessing Dataset `/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py`... +|=>> 11/30 [06:09:17] - datasets.builder - WARNING :: Reusing dataset baby_lm_corpus (//local/xiulyang/babylm_models/shuffle_control_ro_RO_randinit/babylm_shuffle_control_ro_RO_randinit_seed53/artifacts/datasets/baby_lm_corpus/shuffle_control_ro_RO_seed53/0.0.0/448f153d0e278a0d2780d5efab0189862cd1263a34c2cdd5ba88610068970183) +|=>> 11/30 [06:09:17] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 11/30 [06:09:17] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 11/30 [06:09:17] - mistral.corpora.indexer - INFO :: Found existing indexed dataset at //local/xiulyang/babylm_models/shuffle_control_ro_RO_randinit/babylm_shuffle_control_ro_RO_randinit_seed53/artifacts/gpt2-processed/shuffle_control_ro_RO_seed53-/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py/preprocessing/tokenization/train-tokenized +|=>> 11/30 [06:09:17] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 11/30 [06:09:17] - mistral.corpora.indexer - INFO :: Found existing indexed dataset at //local/xiulyang/babylm_models/shuffle_control_ro_RO_randinit/babylm_shuffle_control_ro_RO_randinit_seed53/artifacts/gpt2-processed/shuffle_control_ro_RO_seed53-/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py/preprocessing/tokenization/validation-tokenized +|=>> 11/30 [06:09:17] - mistral - INFO :: Initializing Model Trainer... +|=>> 11/30 [06:09:17] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//local/xiulyang/babylm_models/shuffle_control_ro_RO_randinit/babylm_shuffle_control_ro_RO_randinit_seed53/runs/shuffle_control_ro_RO_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=shuffle_control_ro_RO_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 11/30 [06:09:17] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 11/30 [06:09:21] - mistral - INFO :: Training... +|=>> 11/30 [06:09:21] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 11/30 [15:29:01] - mistral - INFO :: ...and that's all folks! +|=>> 11/30 [15:29:01] - mistral - INFO :: Running final evaluation... diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183