diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..e62f1c2a73e204dd71fd38dbd4de66ee58a3ae92 --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4794ea0b9a495e87fc34e316efe1b585527d6e3d15e4d093362ab24ee74ab3f +size 510396521 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..e70a45ac8ad6293958779b2e650eee3620ade084 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e08e079782e61659bc8038b738ba74e82555eba288df8e8095d4b7684c2fb85 +size 995603825 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..8468a059d245b199162ac4bfe060fbf34097cb45 --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98b1be20b4fb85ff53b28eda569b43f7d7f1176c9d268512a753cda4db40b5a6 +size 510396521 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..c06a15ee12a8e5eeaf5a07f716c4e63a8c557254 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1115dcfef92d99268f39b5d17876134a24d1445b954c80839a3f7d599882cb1e +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..13f74188df3aadbdf49b8b25ab81645d238103c2 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0208333333333335, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.705417699328e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..2c2c20d94c2667d56c78b42ea9c32eeb1a27c881 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c944b41ad3f05ad0c74cbb582e217424b13b6d6d72fd73fb593d0728e9d75835 +size 995604017 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..281f3ec88f4f0766516aaa6789a50b48aebf0e96 --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef9549501f7ff52940a4741de5930333eb1c242117721e03f36de1062971b788 +size 510396521 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..667a03e92e550e3d2c22b12c5ed2fd90fa186ee7 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8649579cfb3a505567afcd59db63c6a9fdd660ba52d4dc5786c21795dd39b68a +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..b60c322aa4c8a078701783033ea3a03672eb7290 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 39.020833333333336, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + }, + { + "epoch": 9.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.7317, + "step": 250 + }, + { + "epoch": 11.02, + "learning_rate": 0.0005, + "loss": 5.4526, + "step": 300 + }, + { + "epoch": 13.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.246, + "step": 350 + }, + { + "epoch": 15.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.0966, + "step": 400 + }, + { + "epoch": 17.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.9523, + "step": 450 + }, + { + "epoch": 19.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.7992, + "step": 500 + }, + { + "epoch": 21.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.6133, + "step": 550 + }, + { + "epoch": 23.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.438, + "step": 600 + }, + { + "epoch": 25.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.2932, + "step": 650 + }, + { + "epoch": 27.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.1681, + "step": 700 + }, + { + "epoch": 29.02, + "learning_rate": 0.00025, + "loss": 4.0568, + "step": 750 + }, + { + "epoch": 31.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.9603, + "step": 800 + }, + { + "epoch": 33.02, + "learning_rate": 0.00019444444444444443, + "loss": 3.8758, + "step": 850 + }, + { + "epoch": 35.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.8026, + "step": 900 + }, + { + "epoch": 37.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.7385, + "step": 950 + }, + { + "epoch": 39.02, + "learning_rate": 0.00011111111111111109, + "loss": 3.6821, + "step": 1000 + }, + { + "epoch": 39.02, + "eval_loss": 4.709035396575928, + "eval_runtime": 5.2665, + "eval_samples_per_second": 18.038, + "eval_steps_per_second": 1.139, + "step": 1000 + }, + { + "epoch": 39.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.709035396575928, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 110.94509026017886, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 5.2665, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 18.038, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7143538868224e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..b2620b76583944ed42ff8f5e764f171e9212a718 --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be9686075beae92465241411eac3e6daf4d27f5ae24d0ef5ce6c2b263cb6f570 +size 995604017 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..3a2a98da0b0af4770c265dd3ee2923a141089a6a --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d6c4f77902f674ea32b6c5c5bd24674bcb651f0b5135d63617fa7a412de2f85 +size 510396521 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..9e23d9d4081dbe4dc7a68be6179fea01a6ef23f2 --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ca13a73b623d994505a11b8e434a5765da806efbbb6f15bff6729c94a33a1a +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..726dc194b933445687eb80ea4dc34be4283da16b --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 43.020833333333336, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + }, + { + "epoch": 9.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.7317, + "step": 250 + }, + { + "epoch": 11.02, + "learning_rate": 0.0005, + "loss": 5.4526, + "step": 300 + }, + { + "epoch": 13.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.246, + "step": 350 + }, + { + "epoch": 15.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.0966, + "step": 400 + }, + { + "epoch": 17.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.9523, + "step": 450 + }, + { + "epoch": 19.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.7992, + "step": 500 + }, + { + "epoch": 21.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.6133, + "step": 550 + }, + { + "epoch": 23.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.438, + "step": 600 + }, + { + "epoch": 25.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.2932, + "step": 650 + }, + { + "epoch": 27.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.1681, + "step": 700 + }, + { + "epoch": 29.02, + "learning_rate": 0.00025, + "loss": 4.0568, + "step": 750 + }, + { + "epoch": 31.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.9603, + "step": 800 + }, + { + "epoch": 33.02, + "learning_rate": 0.00019444444444444443, + "loss": 3.8758, + "step": 850 + }, + { + "epoch": 35.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.8026, + "step": 900 + }, + { + "epoch": 37.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.7385, + "step": 950 + }, + { + "epoch": 39.02, + "learning_rate": 0.00011111111111111109, + "loss": 3.6821, + "step": 1000 + }, + { + "epoch": 39.02, + "eval_loss": 4.709035396575928, + "eval_runtime": 5.2665, + "eval_samples_per_second": 18.038, + "eval_steps_per_second": 1.139, + "step": 1000 + }, + { + "epoch": 39.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.709035396575928, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 110.94509026017886, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 5.2665, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 18.038, + "step": 1000 + }, + { + "epoch": 41.02, + "learning_rate": 8.333333333333333e-05, + "loss": 3.6317, + "step": 1050 + }, + { + "epoch": 43.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.5903, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.9858885664768e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..4b5576b298bbf2bb3137d0584bac290c0c2a1baa --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:496d4eebfa239f906b78a7c8aa43843db0d9d3c512f0f94079e8b7463274a72e +size 995604017 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..868001a08109980517cb632374c68f5e59faeab9 --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8b835e191ab1ccba6197d07ba28e4b9bc4661f01c6d6948073e0f35780f63e +size 510396521 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..f7b16c4797ba41712e0d2f2849765ff94241a6dc --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f43dc08e39a04f765530cec7b01e506a90c6a0abdf77d9f84bc1bac282c67b0f +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..dc769821821555e2f6ad59f72d5b07b3d43560cc --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 47.020833333333336, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + }, + { + "epoch": 9.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.7317, + "step": 250 + }, + { + "epoch": 11.02, + "learning_rate": 0.0005, + "loss": 5.4526, + "step": 300 + }, + { + "epoch": 13.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.246, + "step": 350 + }, + { + "epoch": 15.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.0966, + "step": 400 + }, + { + "epoch": 17.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.9523, + "step": 450 + }, + { + "epoch": 19.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.7992, + "step": 500 + }, + { + "epoch": 21.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.6133, + "step": 550 + }, + { + "epoch": 23.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.438, + "step": 600 + }, + { + "epoch": 25.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.2932, + "step": 650 + }, + { + "epoch": 27.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.1681, + "step": 700 + }, + { + "epoch": 29.02, + "learning_rate": 0.00025, + "loss": 4.0568, + "step": 750 + }, + { + "epoch": 31.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.9603, + "step": 800 + }, + { + "epoch": 33.02, + "learning_rate": 0.00019444444444444443, + "loss": 3.8758, + "step": 850 + }, + { + "epoch": 35.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.8026, + "step": 900 + }, + { + "epoch": 37.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.7385, + "step": 950 + }, + { + "epoch": 39.02, + "learning_rate": 0.00011111111111111109, + "loss": 3.6821, + "step": 1000 + }, + { + "epoch": 39.02, + "eval_loss": 4.709035396575928, + "eval_runtime": 5.2665, + "eval_samples_per_second": 18.038, + "eval_steps_per_second": 1.139, + "step": 1000 + }, + { + "epoch": 39.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.709035396575928, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 110.94509026017886, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 5.2665, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 18.038, + "step": 1000 + }, + { + "epoch": 41.02, + "learning_rate": 8.333333333333333e-05, + "loss": 3.6317, + "step": 1050 + }, + { + "epoch": 43.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.5903, + "step": 1100 + }, + { + "epoch": 45.02, + "learning_rate": 2.7777777777777772e-05, + "loss": 3.5567, + "step": 1150 + }, + { + "epoch": 47.02, + "learning_rate": 0.0, + "loss": 3.533, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.2574232461312e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..7e9a3215a4d46959944f3cca0edf719cb027e02b --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:333a00a2b024238a8f18f2b3333e40ca90abb1c46654f1d346df6076de55d405 +size 995603825 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..46ca06a764a4a953a2aee90cb572722c6add488e --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a6928c24fb91aa5d6852d51c2925ef943fdb1a349104b09bbaf7d4135bb2ac +size 510396521 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..0ad298fe14b0cfc4ee3756f8bbc895c8e23eaab3 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e010992738a85ab2e9b35c007f4a8bb110047a9f625882b93fba7cd0170602ea +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..8c704d0c2b9bea7d98c4f09a547e77e2fe9f9747 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.020833333333333, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.420764495872e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..252e5429de992b1502c4ca98f92cf2fd0a49ab70 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1303de96dcb196ded562dffb433cc793b4f6524fb1c26f9a1f233f99d289c9b0 +size 995604017 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..fc5d0f1dd858f1a00e5e7069d9f3975b2efaec84 --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ccaa4ffe971bea6843ec245c38ce0ae6413f5722e352278777e6b9700f3b163 +size 510396521 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..20ddec6932093c74b8214719cf4dd40595da6032 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80c7292e6764c23484b48fd3f16d779cb38ff085ad505e9311a48941cec2474 +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..fffe9860c906fcdcd8218e71096413d464ba1854 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.020833333333334, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + }, + { + "epoch": 9.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.7317, + "step": 250 + }, + { + "epoch": 11.02, + "learning_rate": 0.0005, + "loss": 5.4526, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.136111292416e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..5af708eea166f62a9234505b1734932c5e28f529 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc0a9df2d2a72ac394dbe7feb7d22d37416fcf0e57d70f5b66cf8eba9b155a0a +size 995604017 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..2cba0ac10e2fec187eb2539cd042f1bc669377ed --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714463cd97f89adbdf5774edeff61b42eb710f28a804d134cf8d85ee9b4e0c0a +size 510396521 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..e7e2bdf96128a25750920fd2cafff2fd8c9fc6bc --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893374075be2a95cc9921e6d18c7224e3df1a64e115405788c12616d049ecb0f +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..071c384e61caf43ed8766120be9d4a061c57c28b --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 15.020833333333334, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + }, + { + "epoch": 9.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.7317, + "step": 250 + }, + { + "epoch": 11.02, + "learning_rate": 0.0005, + "loss": 5.4526, + "step": 300 + }, + { + "epoch": 13.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.246, + "step": 350 + }, + { + "epoch": 15.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.0966, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.085145808896e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..c2e72d642ebcdfe4b4c503a68e06890e49365bee --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9907f9d9cc0eb170692577ebf339343ec4b03cbf31cc2cf1a903d85567913d58 +size 995604017 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..0a687722ba42a920be1e087c8eb16f007b6374d9 --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b11bf938ef4f1fc0d062dd700f2988bcf5992c957104c95ae7e392b9d439c01 +size 510396521 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..3b54123545ae2cca52b7ff45adaecb8c4fd8cd1d --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:943e6aa2394081e70b938e3bbf8cbe81c78f4f6986533b1a95c497c057d33f05 +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..3b78573c85424d88b39c3408803bf91350d3e8c4 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.020833333333332, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + }, + { + "epoch": 9.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.7317, + "step": 250 + }, + { + "epoch": 11.02, + "learning_rate": 0.0005, + "loss": 5.4526, + "step": 300 + }, + { + "epoch": 13.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.246, + "step": 350 + }, + { + "epoch": 15.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.0966, + "step": 400 + }, + { + "epoch": 17.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.9523, + "step": 450 + }, + { + "epoch": 19.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.7992, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.3566804885504e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..809f37f43c4d4de1740307dfcee053ac837f445c --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c957929815376307a7ce68d7207534bb2a33acae262412b30c0233cf4b58721 +size 995604017 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..30317e31d272e55b0e04c9aa94ac1f20054a88be --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:717b7307c0b0daa5a0ab928273e18ab46c3af75251e37f19359c6b96ecc417ed +size 510396521 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..7500192da1a3c2344c973a7afc808f33db364599 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6515483865af5c4ea0669be7f0664f25852cb2bdaac46bc971136bdb5a4bd69e +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..2292b9c26838d4f6b5dcd71726b5cb8bda17a404 --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 23.020833333333332, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + }, + { + "epoch": 9.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.7317, + "step": 250 + }, + { + "epoch": 11.02, + "learning_rate": 0.0005, + "loss": 5.4526, + "step": 300 + }, + { + "epoch": 13.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.246, + "step": 350 + }, + { + "epoch": 15.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.0966, + "step": 400 + }, + { + "epoch": 17.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.9523, + "step": 450 + }, + { + "epoch": 19.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.7992, + "step": 500 + }, + { + "epoch": 21.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.6133, + "step": 550 + }, + { + "epoch": 23.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.438, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.6282151682048e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..a33905c8381a46de59afd337a1fcf418d2862327 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c557dc317c7c50ec3aa985d4ddd1a33a2ca1de615a9b8a02d8268b8131f5b9b7 +size 995604017 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..d30bbcf8a1222f9e40c26eb3c3533efb249d7406 --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4f35957a89f1d86ddd1c38d3c89bf4c634afaf1fb0b2f3ecd15535e67aa6fa +size 510396521 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..bd8abefe2e80d0f133f388f0999668dabc1115b6 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f87e2271ad81098cb78770c5d2c2d050be57745ec292f0bd4cbf67c90f6176eb +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..c4b3c599cb5ab6d5a8c9a17fada6f6605270d675 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 27.020833333333332, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + }, + { + "epoch": 9.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.7317, + "step": 250 + }, + { + "epoch": 11.02, + "learning_rate": 0.0005, + "loss": 5.4526, + "step": 300 + }, + { + "epoch": 13.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.246, + "step": 350 + }, + { + "epoch": 15.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.0966, + "step": 400 + }, + { + "epoch": 17.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.9523, + "step": 450 + }, + { + "epoch": 19.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.7992, + "step": 500 + }, + { + "epoch": 21.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.6133, + "step": 550 + }, + { + "epoch": 23.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.438, + "step": 600 + }, + { + "epoch": 25.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.2932, + "step": 650 + }, + { + "epoch": 27.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.1681, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.8997498478592e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..ca92de01b36166d44a13f0cb41e8edb61cbbdedb --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98f096e88ff46ff6d658953e9aa524e39ad1c77f5fa6e013b750afcbd9a2aed3 +size 995604017 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..06d6323a5908fcfe2952aed13854f1c28d01cef7 --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512f9d8b3cb1a9c8d4cf48b30d808146dde594a20eeafa283bf806a5d90d4f4f +size 510396521 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..b5c84f7d9f6c758334c190945725bb4514389511 --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435697031e13ffe8dd4cfb9d343ce2d76743e839f114095ee6229db984719dcc +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..797c1ac706ca2c433278c1ab1116a4ed96dac1b6 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 31.020833333333332, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + }, + { + "epoch": 9.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.7317, + "step": 250 + }, + { + "epoch": 11.02, + "learning_rate": 0.0005, + "loss": 5.4526, + "step": 300 + }, + { + "epoch": 13.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.246, + "step": 350 + }, + { + "epoch": 15.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.0966, + "step": 400 + }, + { + "epoch": 17.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.9523, + "step": 450 + }, + { + "epoch": 19.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.7992, + "step": 500 + }, + { + "epoch": 21.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.6133, + "step": 550 + }, + { + "epoch": 23.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.438, + "step": 600 + }, + { + "epoch": 25.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.2932, + "step": 650 + }, + { + "epoch": 27.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.1681, + "step": 700 + }, + { + "epoch": 29.02, + "learning_rate": 0.00025, + "loss": 4.0568, + "step": 750 + }, + { + "epoch": 31.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.9603, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.1712845275136e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..f23bf806f8a8950cc3a0f0789ee9a38195b22107 --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b2e94cda6e3b5f16051a12e0e12dbe8e8610df1159a1540c899ae50025fde64 +size 995604017 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..1a30f00ce44d7a85ecd00d0fbea784ecb60b4254 --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64c1bef64edd735e67cd30eabb69ee09f001ede70aa29842f98bfcc6c7722e7 +size 510396521 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..1713ae8cb1fd31a31108e0a80b16ecacbe16fe04 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00bb6c475c9ad4f3dbe1bfa5f31bcf2de63cdb3fdb1cbbf7d822107c7cd4d6f5 +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..1b230f0c700561fa66b287638226edc09cd90286 --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 35.020833333333336, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9503, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.4039, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.9664, + "step": 100 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005833333333333333, + "loss": 6.5622, + "step": 150 + }, + { + "epoch": 7.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.0975, + "step": 200 + }, + { + "epoch": 9.02, + "learning_rate": 0.0005277777777777777, + "loss": 5.7317, + "step": 250 + }, + { + "epoch": 11.02, + "learning_rate": 0.0005, + "loss": 5.4526, + "step": 300 + }, + { + "epoch": 13.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.246, + "step": 350 + }, + { + "epoch": 15.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.0966, + "step": 400 + }, + { + "epoch": 17.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.9523, + "step": 450 + }, + { + "epoch": 19.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.7992, + "step": 500 + }, + { + "epoch": 21.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.6133, + "step": 550 + }, + { + "epoch": 23.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.438, + "step": 600 + }, + { + "epoch": 25.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.2932, + "step": 650 + }, + { + "epoch": 27.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.1681, + "step": 700 + }, + { + "epoch": 29.02, + "learning_rate": 0.00025, + "loss": 4.0568, + "step": 750 + }, + { + "epoch": 31.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.9603, + "step": 800 + }, + { + "epoch": 33.02, + "learning_rate": 0.00019444444444444443, + "loss": 3.8758, + "step": 850 + }, + { + "epoch": 35.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.8026, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.442819207168e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183 diff --git a/config.json b/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/metrics.json b/metrics.json new file mode 100755 index 0000000000000000000000000000000000000000..8a6cd42a1449d49b1a5235e6a1d633e5e18c3c09 --- /dev/null +++ b/metrics.json @@ -0,0 +1,2508 @@ +{"num_parameters": 124439808, "trainable_parameters": 124439808, "step": 0} +{"train_info/time_between_train_steps": 4.231281042098999, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 28.67011260986328, "step": 1} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 19761.71484375, "train_info/memory_reserved": 22624.0, "train_info/memory_max_reserved": 22624.0, "_timestamp": 1736746899, "_runtime": 59}, "step": 1} +{"logs": {"train/loss": 10.9503, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1736746899, "_runtime": 59}, "step": 1} +{"train_info/time_between_train_steps": 0.0343930721282959, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 27.98525643348694, "step": 2} +{"train_info/time_between_train_steps": 0.005816936492919922, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 27.756082773208618, "step": 3} +{"train_info/time_between_train_steps": 0.00586247444152832, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 27.91880750656128, "step": 4} +{"train_info/time_between_train_steps": 0.005884885787963867, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 27.77273416519165, "step": 5} +{"train_info/time_between_train_steps": 0.0058171749114990234, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 27.835901260375977, "step": 6} +{"train_info/time_between_train_steps": 0.010787248611450195, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 27.74529457092285, "step": 7} +{"train_info/time_between_train_steps": 0.0054013729095458984, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 27.792829275131226, "step": 8} +{"train_info/time_between_train_steps": 0.0057599544525146484, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 27.774235010147095, "step": 9} +{"train_info/time_between_train_steps": 0.005511760711669922, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 27.750837087631226, "step": 10} +{"train_info/time_between_train_steps": 0.005800724029541016, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 27.732007265090942, "step": 11} +{"train_info/time_between_train_steps": 0.005982637405395508, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 27.747387886047363, "step": 12} +{"train_info/time_between_train_steps": 0.00567317008972168, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 27.767855167388916, "step": 13} +{"train_info/time_between_train_steps": 0.005499601364135742, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 27.738843202590942, "step": 14} +{"train_info/time_between_train_steps": 0.005648136138916016, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 27.778737545013428, "step": 15} +{"train_info/time_between_train_steps": 0.00679469108581543, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 27.87007212638855, "step": 16} +{"train_info/time_between_train_steps": 0.005956172943115234, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 27.768137216567993, "step": 17} +{"train_info/time_between_train_steps": 0.005670309066772461, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 27.759645700454712, "step": 18} +{"train_info/time_between_train_steps": 0.005693912506103516, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 27.78532838821411, "step": 19} +{"train_info/time_between_train_steps": 0.00569462776184082, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 27.767991542816162, "step": 20} +{"train_info/time_between_train_steps": 0.005698442459106445, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 27.79796004295349, "step": 21} +{"train_info/time_between_train_steps": 0.006039619445800781, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 27.761291027069092, "step": 22} +{"train_info/time_between_train_steps": 0.006047248840332031, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 27.74437427520752, "step": 23} +{"train_info/time_between_train_steps": 0.006060361862182617, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 27.761541843414307, "step": 24} +{"train_info/time_between_train_steps": 0.006154537200927734, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 27.825977325439453, "step": 25} +{"train_info/time_between_train_steps": 0.01197504997253418, "step": 25} +{"train_info/time_between_train_steps": 13.612655401229858, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 27.964868307113647, "step": 26} +{"train_info/time_between_train_steps": 0.005403757095336914, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 27.94994592666626, "step": 27} +{"train_info/time_between_train_steps": 0.005789279937744141, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 27.792983293533325, "step": 28} +{"train_info/time_between_train_steps": 0.005758762359619141, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 27.94905662536621, "step": 29} +{"train_info/time_between_train_steps": 0.008152484893798828, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 27.770328998565674, "step": 30} +{"train_info/time_between_train_steps": 0.006259441375732422, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 27.909087896347046, "step": 31} +{"train_info/time_between_train_steps": 0.005600452423095703, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 27.734764099121094, "step": 32} +{"train_info/time_between_train_steps": 0.010912418365478516, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 27.735422372817993, "step": 33} +{"train_info/time_between_train_steps": 0.010901212692260742, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 27.76761269569397, "step": 34} +{"train_info/time_between_train_steps": 0.005789279937744141, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 27.804534196853638, "step": 35} +{"train_info/time_between_train_steps": 0.005586862564086914, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 27.7710223197937, "step": 36} +{"train_info/time_between_train_steps": 0.005697965621948242, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 27.740391731262207, "step": 37} +{"train_info/time_between_train_steps": 0.005646467208862305, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 27.800096035003662, "step": 38} +{"train_info/time_between_train_steps": 0.005815744400024414, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 27.796868562698364, "step": 39} +{"train_info/time_between_train_steps": 0.005811214447021484, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 27.83417320251465, "step": 40} +{"train_info/time_between_train_steps": 0.0057735443115234375, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 27.811189889907837, "step": 41} +{"train_info/time_between_train_steps": 0.005726337432861328, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 27.771095752716064, "step": 42} +{"train_info/time_between_train_steps": 0.005793571472167969, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 27.767019271850586, "step": 43} +{"train_info/time_between_train_steps": 0.005795955657958984, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 27.776472806930542, "step": 44} +{"train_info/time_between_train_steps": 0.005685329437255859, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 27.7679762840271, "step": 45} +{"train_info/time_between_train_steps": 0.005784511566162109, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 27.78765630722046, "step": 46} +{"train_info/time_between_train_steps": 0.006132364273071289, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 27.84262752532959, "step": 47} +{"train_info/time_between_train_steps": 0.006243228912353516, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 27.7929527759552, "step": 48} +{"train_info/time_between_train_steps": 0.005990028381347656, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 27.79917550086975, "step": 49} +{"train_info/time_between_train_steps": 0.006192684173583984, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 27.77308416366577, "step": 50} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736748277, "_runtime": 1437}, "step": 50} +{"logs": {"train/loss": 8.4039, "train/learning_rate": 0.00025, "train/epoch": 1.02, "_timestamp": 1736748277, "_runtime": 1437}, "step": 50} +{"train_info/time_between_train_steps": 0.05469942092895508, "step": 50} +{"train_info/time_between_train_steps": 13.575218439102173, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 27.74161720275879, "step": 51} +{"train_info/time_between_train_steps": 0.005860805511474609, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 27.953779697418213, "step": 52} +{"train_info/time_between_train_steps": 0.0058176517486572266, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 27.763532161712646, "step": 53} +{"train_info/time_between_train_steps": 0.005667924880981445, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 27.938002347946167, "step": 54} +{"train_info/time_between_train_steps": 0.0058782100677490234, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 27.76921796798706, "step": 55} +{"train_info/time_between_train_steps": 0.005616188049316406, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 27.790464878082275, "step": 56} +{"train_info/time_between_train_steps": 0.005444765090942383, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 27.849008083343506, "step": 57} +{"train_info/time_between_train_steps": 0.005512237548828125, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 27.804283380508423, "step": 58} +{"train_info/time_between_train_steps": 0.00554347038269043, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 27.832448720932007, "step": 59} +{"train_info/time_between_train_steps": 0.005629062652587891, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 27.759270429611206, "step": 60} +{"train_info/time_between_train_steps": 0.0107574462890625, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 27.807969331741333, "step": 61} +{"train_info/time_between_train_steps": 0.005522012710571289, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 27.846227169036865, "step": 62} +{"train_info/time_between_train_steps": 0.005743265151977539, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 27.777459383010864, "step": 63} +{"train_info/time_between_train_steps": 0.010764598846435547, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 27.784525156021118, "step": 64} +{"train_info/time_between_train_steps": 0.010919809341430664, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 27.755019664764404, "step": 65} +{"train_info/time_between_train_steps": 0.005559682846069336, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 27.768386363983154, "step": 66} +{"train_info/time_between_train_steps": 0.0065724849700927734, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 27.792860507965088, "step": 67} +{"train_info/time_between_train_steps": 0.005805492401123047, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 27.75765824317932, "step": 68} +{"train_info/time_between_train_steps": 0.005800008773803711, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 27.789013624191284, "step": 69} +{"train_info/time_between_train_steps": 0.0058650970458984375, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 27.77230978012085, "step": 70} +{"train_info/time_between_train_steps": 0.006785154342651367, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 27.806419372558594, "step": 71} +{"train_info/time_between_train_steps": 0.005834817886352539, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 27.822138786315918, "step": 72} +{"train_info/time_between_train_steps": 0.0061190128326416016, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 27.805681467056274, "step": 73} +{"train_info/time_between_train_steps": 0.00590205192565918, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 27.837265253067017, "step": 74} +{"train_info/time_between_train_steps": 0.006236553192138672, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 27.814294576644897, "step": 75} +{"train_info/time_between_train_steps": 0.006262779235839844, "step": 75} +{"train_info/time_between_train_steps": 13.850070238113403, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 27.8278169631958, "step": 76} +{"train_info/time_between_train_steps": 0.006022453308105469, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 27.99466896057129, "step": 77} +{"train_info/time_between_train_steps": 0.005764484405517578, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 27.789047718048096, "step": 78} +{"train_info/time_between_train_steps": 0.006006956100463867, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 27.918343782424927, "step": 79} +{"train_info/time_between_train_steps": 0.005774497985839844, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 27.890490293502808, "step": 80} +{"train_info/time_between_train_steps": 0.0058934688568115234, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 27.800718784332275, "step": 81} +{"train_info/time_between_train_steps": 0.005517005920410156, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 27.78862476348877, "step": 82} +{"train_info/time_between_train_steps": 0.005556583404541016, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 27.804096937179565, "step": 83} +{"train_info/time_between_train_steps": 0.005966663360595703, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 27.899922609329224, "step": 84} +{"train_info/time_between_train_steps": 0.005691051483154297, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 28.02219820022583, "step": 85} +{"train_info/time_between_train_steps": 0.010469436645507812, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 28.08858847618103, "step": 86} +{"train_info/time_between_train_steps": 0.010514974594116211, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 28.114304542541504, "step": 87} +{"train_info/time_between_train_steps": 0.01043248176574707, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 28.138680458068848, "step": 88} +{"train_info/time_between_train_steps": 0.010482549667358398, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 28.083876609802246, "step": 89} +{"train_info/time_between_train_steps": 0.010350465774536133, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 27.91224217414856, "step": 90} +{"train_info/time_between_train_steps": 0.005457162857055664, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 27.802191495895386, "step": 91} +{"train_info/time_between_train_steps": 0.010899782180786133, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 27.793875455856323, "step": 92} +{"train_info/time_between_train_steps": 0.010659456253051758, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 27.899842739105225, "step": 93} +{"train_info/time_between_train_steps": 0.0062160491943359375, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 27.764566898345947, "step": 94} +{"train_info/time_between_train_steps": 0.0056951045989990234, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 27.7702898979187, "step": 95} +{"train_info/time_between_train_steps": 0.005719184875488281, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 27.805593013763428, "step": 96} +{"train_info/time_between_train_steps": 0.00830221176147461, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 27.81998872756958, "step": 97} +{"train_info/time_between_train_steps": 0.007004261016845703, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 27.830317497253418, "step": 98} +{"train_info/time_between_train_steps": 0.008476495742797852, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 27.82971739768982, "step": 99} +{"train_info/time_between_train_steps": 0.005730152130126953, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 27.800907611846924, "step": 100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736749699, "_runtime": 2859}, "step": 100} +{"logs": {"train/loss": 6.9664, "train/learning_rate": 0.0005, "train/epoch": 3.02, "_timestamp": 1736749699, "_runtime": 2859}, "step": 100} +{"train_info/time_between_train_steps": 125.57121300697327, "step": 100} +{"train_info/time_between_train_steps": 139.57633423805237, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 27.79901123046875, "step": 101} +{"train_info/time_between_train_steps": 0.010436773300170898, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 27.95105266571045, "step": 102} +{"train_info/time_between_train_steps": 0.006609201431274414, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 27.792060375213623, "step": 103} +{"train_info/time_between_train_steps": 0.005762577056884766, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 27.86471652984619, "step": 104} +{"train_info/time_between_train_steps": 0.006424665451049805, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 27.766579389572144, "step": 105} +{"train_info/time_between_train_steps": 0.005938529968261719, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 27.89447593688965, "step": 106} +{"train_info/time_between_train_steps": 0.005514621734619141, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 27.764947175979614, "step": 107} +{"train_info/time_between_train_steps": 0.005559444427490234, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 27.99226713180542, "step": 108} +{"train_info/time_between_train_steps": 0.005822658538818359, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 27.758615016937256, "step": 109} +{"train_info/time_between_train_steps": 0.005583763122558594, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 27.782161474227905, "step": 110} +{"train_info/time_between_train_steps": 0.006979703903198242, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 27.782748222351074, "step": 111} +{"train_info/time_between_train_steps": 0.0055081844329833984, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 27.80831527709961, "step": 112} +{"train_info/time_between_train_steps": 0.005602598190307617, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 28.162217617034912, "step": 113} +{"train_info/time_between_train_steps": 0.005548000335693359, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 27.773748636245728, "step": 114} +{"train_info/time_between_train_steps": 0.005516529083251953, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 27.848867893218994, "step": 115} +{"train_info/time_between_train_steps": 0.010557889938354492, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 27.75695538520813, "step": 116} +{"train_info/time_between_train_steps": 0.006639719009399414, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 27.840136528015137, "step": 117} +{"train_info/time_between_train_steps": 0.005560159683227539, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 27.753881454467773, "step": 118} +{"train_info/time_between_train_steps": 0.005697965621948242, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 27.792354345321655, "step": 119} +{"train_info/time_between_train_steps": 0.005583047866821289, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 27.760254383087158, "step": 120} +{"train_info/time_between_train_steps": 0.005755901336669922, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 27.866248607635498, "step": 121} +{"train_info/time_between_train_steps": 0.005694866180419922, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 27.782791137695312, "step": 122} +{"train_info/time_between_train_steps": 0.005852937698364258, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 27.860573768615723, "step": 123} +{"train_info/time_between_train_steps": 0.00585627555847168, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 27.893499612808228, "step": 124} +{"train_info/time_between_train_steps": 0.006002902984619141, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 27.781188488006592, "step": 125} +{"train_info/time_between_train_steps": 0.006436347961425781, "step": 125} +{"train_info/time_between_train_steps": 13.727400779724121, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 27.756702661514282, "step": 126} +{"train_info/time_between_train_steps": 0.0055272579193115234, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 28.044368028640747, "step": 127} +{"train_info/time_between_train_steps": 0.00558018684387207, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 27.747931241989136, "step": 128} +{"train_info/time_between_train_steps": 0.0059201717376708984, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 27.951457738876343, "step": 129} +{"train_info/time_between_train_steps": 0.00561976432800293, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 27.789060592651367, "step": 130} +{"train_info/time_between_train_steps": 0.005709648132324219, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 27.78397846221924, "step": 131} +{"train_info/time_between_train_steps": 0.0055119991302490234, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 27.73598337173462, "step": 132} +{"train_info/time_between_train_steps": 0.005440473556518555, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 27.744569301605225, "step": 133} +{"train_info/time_between_train_steps": 0.005673408508300781, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 27.97429394721985, "step": 134} +{"train_info/time_between_train_steps": 0.005766868591308594, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 27.748329162597656, "step": 135} +{"train_info/time_between_train_steps": 0.005839109420776367, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 27.858842849731445, "step": 136} +{"train_info/time_between_train_steps": 0.0061986446380615234, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 27.7554988861084, "step": 137} +{"train_info/time_between_train_steps": 0.005785465240478516, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 27.867204904556274, "step": 138} +{"train_info/time_between_train_steps": 0.005693912506103516, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 27.845656871795654, "step": 139} +{"train_info/time_between_train_steps": 0.007787942886352539, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 27.86683678627014, "step": 140} +{"train_info/time_between_train_steps": 0.005660057067871094, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 27.76923394203186, "step": 141} +{"train_info/time_between_train_steps": 0.005766630172729492, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 27.896474361419678, "step": 142} +{"train_info/time_between_train_steps": 0.0058307647705078125, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 27.757416486740112, "step": 143} +{"train_info/time_between_train_steps": 0.010547876358032227, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 27.759366989135742, "step": 144} +{"train_info/time_between_train_steps": 0.0058155059814453125, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 27.906532049179077, "step": 145} +{"train_info/time_between_train_steps": 0.0056536197662353516, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 27.78737235069275, "step": 146} +{"train_info/time_between_train_steps": 0.006066083908081055, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 27.877039194107056, "step": 147} +{"train_info/time_between_train_steps": 0.006112337112426758, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 27.756247758865356, "step": 148} +{"train_info/time_between_train_steps": 0.01109004020690918, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 27.864888191223145, "step": 149} +{"train_info/time_between_train_steps": 0.00630497932434082, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 27.813713550567627, "step": 150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736751245, "_runtime": 4405}, "step": 150} +{"logs": {"train/loss": 6.5622, "train/learning_rate": 0.0005833333333333333, "train/epoch": 5.02, "_timestamp": 1736751245, "_runtime": 4405}, "step": 150} +{"train_info/time_between_train_steps": 0.0741727352142334, "step": 150} +{"train_info/time_between_train_steps": 13.92063307762146, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 27.80779242515564, "step": 151} +{"train_info/time_between_train_steps": 0.0058062076568603516, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 27.90271520614624, "step": 152} +{"train_info/time_between_train_steps": 0.005911588668823242, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 27.754345893859863, "step": 153} +{"train_info/time_between_train_steps": 0.005574464797973633, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 28.006845235824585, "step": 154} +{"train_info/time_between_train_steps": 0.005815744400024414, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 27.766850471496582, "step": 155} +{"train_info/time_between_train_steps": 0.005838632583618164, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 27.76550006866455, "step": 156} +{"train_info/time_between_train_steps": 0.0054509639739990234, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 27.84783363342285, "step": 157} +{"train_info/time_between_train_steps": 0.005662202835083008, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 27.80536937713623, "step": 158} +{"train_info/time_between_train_steps": 0.005517244338989258, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 27.865103006362915, "step": 159} +{"train_info/time_between_train_steps": 0.006518125534057617, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 27.756556510925293, "step": 160} +{"train_info/time_between_train_steps": 0.005571842193603516, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 27.869873762130737, "step": 161} +{"train_info/time_between_train_steps": 0.005970954895019531, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 27.73230814933777, "step": 162} +{"train_info/time_between_train_steps": 0.010831594467163086, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 27.737372398376465, "step": 163} +{"train_info/time_between_train_steps": 0.01065516471862793, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 27.77128767967224, "step": 164} +{"train_info/time_between_train_steps": 0.0057713985443115234, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 27.73548150062561, "step": 165} +{"train_info/time_between_train_steps": 0.005536317825317383, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 27.849610805511475, "step": 166} +{"train_info/time_between_train_steps": 0.005501508712768555, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 27.73882031440735, "step": 167} +{"train_info/time_between_train_steps": 0.005667209625244141, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 27.8102924823761, "step": 168} +{"train_info/time_between_train_steps": 0.0054931640625, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 27.82247757911682, "step": 169} +{"train_info/time_between_train_steps": 0.005562782287597656, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 28.026168823242188, "step": 170} +{"train_info/time_between_train_steps": 0.006173610687255859, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 27.767661809921265, "step": 171} +{"train_info/time_between_train_steps": 0.0056383609771728516, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 27.865869522094727, "step": 172} +{"train_info/time_between_train_steps": 0.00619196891784668, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 27.771958112716675, "step": 173} +{"train_info/time_between_train_steps": 0.005835294723510742, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 27.868709564208984, "step": 174} +{"train_info/time_between_train_steps": 0.006165981292724609, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 27.79560899734497, "step": 175} +{"train_info/time_between_train_steps": 0.006281614303588867, "step": 175} +{"train_info/time_between_train_steps": 13.998578548431396, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 27.76457929611206, "step": 176} +{"train_info/time_between_train_steps": 0.0057697296142578125, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 27.94603133201599, "step": 177} +{"train_info/time_between_train_steps": 0.011185646057128906, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 27.763771295547485, "step": 178} +{"train_info/time_between_train_steps": 0.006119728088378906, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 27.92762589454651, "step": 179} +{"train_info/time_between_train_steps": 0.00582432746887207, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 27.747868537902832, "step": 180} +{"train_info/time_between_train_steps": 0.005699634552001953, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 27.754422903060913, "step": 181} +{"train_info/time_between_train_steps": 0.0053980350494384766, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 27.9800021648407, "step": 182} +{"train_info/time_between_train_steps": 0.010549545288085938, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 27.786980628967285, "step": 183} +{"train_info/time_between_train_steps": 0.005536794662475586, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 27.79291558265686, "step": 184} +{"train_info/time_between_train_steps": 0.005517482757568359, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 27.8377423286438, "step": 185} +{"train_info/time_between_train_steps": 0.005532503128051758, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 27.761621475219727, "step": 186} +{"train_info/time_between_train_steps": 0.0055577754974365234, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 27.778801441192627, "step": 187} +{"train_info/time_between_train_steps": 0.00564265251159668, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 27.740428686141968, "step": 188} +{"train_info/time_between_train_steps": 0.005425453186035156, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 27.756694555282593, "step": 189} +{"train_info/time_between_train_steps": 0.005403995513916016, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 27.745254516601562, "step": 190} +{"train_info/time_between_train_steps": 0.006479740142822266, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 27.82456088066101, "step": 191} +{"train_info/time_between_train_steps": 0.008222818374633789, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 27.802788257598877, "step": 192} +{"train_info/time_between_train_steps": 0.005892038345336914, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 27.8340322971344, "step": 193} +{"train_info/time_between_train_steps": 0.009296894073486328, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 27.747046947479248, "step": 194} +{"train_info/time_between_train_steps": 0.005632162094116211, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 27.880566596984863, "step": 195} +{"train_info/time_between_train_steps": 0.005719900131225586, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 27.780102252960205, "step": 196} +{"train_info/time_between_train_steps": 0.005591392517089844, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 27.806960105895996, "step": 197} +{"train_info/time_between_train_steps": 0.006167411804199219, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 27.829044818878174, "step": 198} +{"train_info/time_between_train_steps": 0.0057795047760009766, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 27.768452882766724, "step": 199} +{"train_info/time_between_train_steps": 0.005978822708129883, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 28.178645610809326, "step": 200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736752666, "_runtime": 5826}, "step": 200} +{"logs": {"train/loss": 6.0975, "train/learning_rate": 0.0005555555555555556, "train/epoch": 7.02, "_timestamp": 1736752666, "_runtime": 5826}, "step": 200} +{"train_info/time_between_train_steps": 114.61879301071167, "step": 200} +{"train_info/time_between_train_steps": 128.20259928703308, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 27.8364315032959, "step": 201} +{"train_info/time_between_train_steps": 0.005782127380371094, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 27.925191640853882, "step": 202} +{"train_info/time_between_train_steps": 0.005963802337646484, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 27.778350114822388, "step": 203} +{"train_info/time_between_train_steps": 0.0056781768798828125, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 28.00202703475952, "step": 204} +{"train_info/time_between_train_steps": 0.005722761154174805, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 27.831275701522827, "step": 205} +{"train_info/time_between_train_steps": 0.0057260990142822266, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 27.762569665908813, "step": 206} +{"train_info/time_between_train_steps": 0.0054073333740234375, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 27.749411821365356, "step": 207} +{"train_info/time_between_train_steps": 0.005625009536743164, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 27.849749088287354, "step": 208} +{"train_info/time_between_train_steps": 0.010473966598510742, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 27.83044695854187, "step": 209} +{"train_info/time_between_train_steps": 0.0055620670318603516, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 27.78005838394165, "step": 210} +{"train_info/time_between_train_steps": 0.0055272579193115234, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 27.83190131187439, "step": 211} +{"train_info/time_between_train_steps": 0.005467891693115234, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 27.734710216522217, "step": 212} +{"train_info/time_between_train_steps": 0.0057375431060791016, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 27.891952753067017, "step": 213} +{"train_info/time_between_train_steps": 0.010632753372192383, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 27.76980996131897, "step": 214} +{"train_info/time_between_train_steps": 0.005547523498535156, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 27.904886960983276, "step": 215} +{"train_info/time_between_train_steps": 0.005613088607788086, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 27.82204246520996, "step": 216} +{"train_info/time_between_train_steps": 0.005507230758666992, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 27.892924070358276, "step": 217} +{"train_info/time_between_train_steps": 0.0056915283203125, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 27.759950399398804, "step": 218} +{"train_info/time_between_train_steps": 0.010118484497070312, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 27.81479048728943, "step": 219} +{"train_info/time_between_train_steps": 0.005589008331298828, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 27.770332098007202, "step": 220} +{"train_info/time_between_train_steps": 0.005757570266723633, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 27.864068269729614, "step": 221} +{"train_info/time_between_train_steps": 0.005937337875366211, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 27.812918186187744, "step": 222} +{"train_info/time_between_train_steps": 0.006111860275268555, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 27.84640407562256, "step": 223} +{"train_info/time_between_train_steps": 0.011037588119506836, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 28.02478003501892, "step": 224} +{"train_info/time_between_train_steps": 0.005861520767211914, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 27.788060426712036, "step": 225} +{"train_info/time_between_train_steps": 0.0066678524017333984, "step": 225} +{"train_info/time_between_train_steps": 13.752729892730713, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 27.737330436706543, "step": 226} +{"train_info/time_between_train_steps": 0.0055866241455078125, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 27.998595714569092, "step": 227} +{"train_info/time_between_train_steps": 0.005898237228393555, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 27.74864959716797, "step": 228} +{"train_info/time_between_train_steps": 0.005618572235107422, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 27.894333124160767, "step": 229} +{"train_info/time_between_train_steps": 0.006440877914428711, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 27.776122570037842, "step": 230} +{"train_info/time_between_train_steps": 0.005902290344238281, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 27.87747597694397, "step": 231} +{"train_info/time_between_train_steps": 0.005419731140136719, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 27.849711656570435, "step": 232} +{"train_info/time_between_train_steps": 0.0056917667388916016, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 27.744503498077393, "step": 233} +{"train_info/time_between_train_steps": 0.005507469177246094, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 27.864915370941162, "step": 234} +{"train_info/time_between_train_steps": 0.005489826202392578, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 27.814507722854614, "step": 235} +{"train_info/time_between_train_steps": 0.005597114562988281, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 27.870710134506226, "step": 236} +{"train_info/time_between_train_steps": 0.005679607391357422, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 27.75040912628174, "step": 237} +{"train_info/time_between_train_steps": 0.005563020706176758, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 27.852272272109985, "step": 238} +{"train_info/time_between_train_steps": 0.005464792251586914, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 27.774358987808228, "step": 239} +{"train_info/time_between_train_steps": 0.006453275680541992, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 27.94214630126953, "step": 240} +{"train_info/time_between_train_steps": 0.005588054656982422, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 27.73142981529236, "step": 241} +{"train_info/time_between_train_steps": 0.0055599212646484375, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 27.855191707611084, "step": 242} +{"train_info/time_between_train_steps": 0.01086115837097168, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 27.80825185775757, "step": 243} +{"train_info/time_between_train_steps": 0.010729551315307617, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 27.827094554901123, "step": 244} +{"train_info/time_between_train_steps": 0.0062100887298583984, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 27.796365976333618, "step": 245} +{"train_info/time_between_train_steps": 0.006071805953979492, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 27.759413957595825, "step": 246} +{"train_info/time_between_train_steps": 0.005760669708251953, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 27.95798683166504, "step": 247} +{"train_info/time_between_train_steps": 0.0065076351165771484, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 27.785863876342773, "step": 248} +{"train_info/time_between_train_steps": 0.005842685699462891, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 27.921176433563232, "step": 249} +{"train_info/time_between_train_steps": 0.006190776824951172, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 27.814133167266846, "step": 250} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736754202, "_runtime": 7362}, "step": 250} +{"logs": {"train/loss": 5.7317, "train/learning_rate": 0.0005277777777777777, "train/epoch": 9.02, "_timestamp": 1736754202, "_runtime": 7362}, "step": 250} +{"train_info/time_between_train_steps": 0.040680646896362305, "step": 250} +{"train_info/time_between_train_steps": 13.94639253616333, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 27.90821361541748, "step": 251} +{"train_info/time_between_train_steps": 0.005998849868774414, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 27.915794134140015, "step": 252} +{"train_info/time_between_train_steps": 0.005873918533325195, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 27.85282254219055, "step": 253} +{"train_info/time_between_train_steps": 0.005887269973754883, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 27.99890112876892, "step": 254} +{"train_info/time_between_train_steps": 0.006124019622802734, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 27.9377498626709, "step": 255} +{"train_info/time_between_train_steps": 0.011588096618652344, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 27.851239442825317, "step": 256} +{"train_info/time_between_train_steps": 0.005796670913696289, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 27.908868312835693, "step": 257} +{"train_info/time_between_train_steps": 0.0056536197662353516, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 27.77297616004944, "step": 258} +{"train_info/time_between_train_steps": 0.008786201477050781, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 27.86879014968872, "step": 259} +{"train_info/time_between_train_steps": 0.0056231021881103516, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 27.73678708076477, "step": 260} +{"train_info/time_between_train_steps": 0.005627870559692383, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 27.732244729995728, "step": 261} +{"train_info/time_between_train_steps": 0.005544900894165039, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 27.864673614501953, "step": 262} +{"train_info/time_between_train_steps": 0.0056116580963134766, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 27.824289083480835, "step": 263} +{"train_info/time_between_train_steps": 0.010766983032226562, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 27.870941162109375, "step": 264} +{"train_info/time_between_train_steps": 0.007336139678955078, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 27.759721517562866, "step": 265} +{"train_info/time_between_train_steps": 0.005784273147583008, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 27.853240728378296, "step": 266} +{"train_info/time_between_train_steps": 0.005757331848144531, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 27.755797147750854, "step": 267} +{"train_info/time_between_train_steps": 0.005766153335571289, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 27.8550226688385, "step": 268} +{"train_info/time_between_train_steps": 0.005702972412109375, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 27.767526388168335, "step": 269} +{"train_info/time_between_train_steps": 0.005771636962890625, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 27.88595175743103, "step": 270} +{"train_info/time_between_train_steps": 0.005959987640380859, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 27.8402316570282, "step": 271} +{"train_info/time_between_train_steps": 0.010719776153564453, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 27.91048526763916, "step": 272} +{"train_info/time_between_train_steps": 0.005942344665527344, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 27.781271934509277, "step": 273} +{"train_info/time_between_train_steps": 0.0058481693267822266, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 27.862377405166626, "step": 274} +{"train_info/time_between_train_steps": 0.006089925765991211, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 27.780593633651733, "step": 275} +{"train_info/time_between_train_steps": 0.006827116012573242, "step": 275} +{"train_info/time_between_train_steps": 13.66599702835083, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 27.858085870742798, "step": 276} +{"train_info/time_between_train_steps": 0.005615949630737305, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 27.927146196365356, "step": 277} +{"train_info/time_between_train_steps": 0.00577235221862793, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 28.30999255180359, "step": 278} +{"train_info/time_between_train_steps": 0.0056934356689453125, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 27.843761205673218, "step": 279} +{"train_info/time_between_train_steps": 0.005678653717041016, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 27.82840323448181, "step": 280} +{"train_info/time_between_train_steps": 0.005827426910400391, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 27.747920036315918, "step": 281} +{"train_info/time_between_train_steps": 0.005348682403564453, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 27.821778535842896, "step": 282} +{"train_info/time_between_train_steps": 0.005562305450439453, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 27.73584294319153, "step": 283} +{"train_info/time_between_train_steps": 0.005596637725830078, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 27.71971344947815, "step": 284} +{"train_info/time_between_train_steps": 0.005527019500732422, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 27.746837615966797, "step": 285} +{"train_info/time_between_train_steps": 0.006479501724243164, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 27.721906900405884, "step": 286} +{"train_info/time_between_train_steps": 0.0054399967193603516, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 27.771524906158447, "step": 287} +{"train_info/time_between_train_steps": 0.0055196285247802734, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 27.727747440338135, "step": 288} +{"train_info/time_between_train_steps": 0.010670661926269531, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 28.11862277984619, "step": 289} +{"train_info/time_between_train_steps": 0.005500316619873047, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 27.972575664520264, "step": 290} +{"train_info/time_between_train_steps": 0.0057179927825927734, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 27.760228395462036, "step": 291} +{"train_info/time_between_train_steps": 0.0055124759674072266, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 27.7887761592865, "step": 292} +{"train_info/time_between_train_steps": 0.005580425262451172, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 27.84902787208557, "step": 293} +{"train_info/time_between_train_steps": 0.00576019287109375, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 27.760342597961426, "step": 294} +{"train_info/time_between_train_steps": 0.0059545040130615234, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 27.85620903968811, "step": 295} +{"train_info/time_between_train_steps": 0.005686759948730469, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 27.743617296218872, "step": 296} +{"train_info/time_between_train_steps": 0.005802154541015625, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 27.77884268760681, "step": 297} +{"train_info/time_between_train_steps": 0.005915641784667969, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 27.861730337142944, "step": 298} +{"train_info/time_between_train_steps": 0.006853580474853516, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 27.769808292388916, "step": 299} +{"train_info/time_between_train_steps": 0.005947589874267578, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 28.22808861732483, "step": 300} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736755624, "_runtime": 8784}, "step": 300} +{"logs": {"train/loss": 5.4526, "train/learning_rate": 0.0005, "train/epoch": 11.02, "_timestamp": 1736755624, "_runtime": 8784}, "step": 300} +{"train_info/time_between_train_steps": 104.85542511940002, "step": 300} +{"train_info/time_between_train_steps": 118.6967933177948, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 27.759785413742065, "step": 301} +{"train_info/time_between_train_steps": 0.005484580993652344, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 28.020368576049805, "step": 302} +{"train_info/time_between_train_steps": 0.0057523250579833984, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 27.748501539230347, "step": 303} +{"train_info/time_between_train_steps": 0.006040811538696289, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 27.927999019622803, "step": 304} +{"train_info/time_between_train_steps": 0.0057184696197509766, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 27.87159013748169, "step": 305} +{"train_info/time_between_train_steps": 0.005904197692871094, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 27.825434923171997, "step": 306} +{"train_info/time_between_train_steps": 0.005777597427368164, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 27.760037183761597, "step": 307} +{"train_info/time_between_train_steps": 0.00583338737487793, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 27.892011404037476, "step": 308} +{"train_info/time_between_train_steps": 0.005624532699584961, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 27.869795322418213, "step": 309} +{"train_info/time_between_train_steps": 0.005749702453613281, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 27.75917935371399, "step": 310} +{"train_info/time_between_train_steps": 0.005659580230712891, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 27.840994834899902, "step": 311} +{"train_info/time_between_train_steps": 0.005866050720214844, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 27.772746562957764, "step": 312} +{"train_info/time_between_train_steps": 0.005507469177246094, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 27.87802219390869, "step": 313} +{"train_info/time_between_train_steps": 0.005620479583740234, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 27.771153926849365, "step": 314} +{"train_info/time_between_train_steps": 0.011009693145751953, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 27.89292550086975, "step": 315} +{"train_info/time_between_train_steps": 0.0056493282318115234, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 27.777199506759644, "step": 316} +{"train_info/time_between_train_steps": 0.005594968795776367, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 27.904991149902344, "step": 317} +{"train_info/time_between_train_steps": 0.005697011947631836, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 27.805131912231445, "step": 318} +{"train_info/time_between_train_steps": 0.005635261535644531, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 27.867639303207397, "step": 319} +{"train_info/time_between_train_steps": 0.00576472282409668, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 27.821465015411377, "step": 320} +{"train_info/time_between_train_steps": 0.006047964096069336, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 27.755616903305054, "step": 321} +{"train_info/time_between_train_steps": 0.00587010383605957, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 27.753971338272095, "step": 322} +{"train_info/time_between_train_steps": 0.006134510040283203, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 27.77558994293213, "step": 323} +{"train_info/time_between_train_steps": 0.006069660186767578, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 27.915106773376465, "step": 324} +{"train_info/time_between_train_steps": 0.006239652633666992, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 27.772446870803833, "step": 325} +{"train_info/time_between_train_steps": 0.006491422653198242, "step": 325} +{"train_info/time_between_train_steps": 14.014498472213745, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 27.74590253829956, "step": 326} +{"train_info/time_between_train_steps": 0.005969524383544922, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 27.94458031654358, "step": 327} +{"train_info/time_between_train_steps": 0.005774736404418945, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 27.78687882423401, "step": 328} +{"train_info/time_between_train_steps": 0.0058116912841796875, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 27.856470823287964, "step": 329} +{"train_info/time_between_train_steps": 0.0056400299072265625, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 27.788063526153564, "step": 330} +{"train_info/time_between_train_steps": 0.005833625793457031, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 27.755180597305298, "step": 331} +{"train_info/time_between_train_steps": 0.0057523250579833984, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 27.778190851211548, "step": 332} +{"train_info/time_between_train_steps": 0.0054378509521484375, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 27.719889640808105, "step": 333} +{"train_info/time_between_train_steps": 0.00554203987121582, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 27.838601112365723, "step": 334} +{"train_info/time_between_train_steps": 0.005656242370605469, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 27.749804735183716, "step": 335} +{"train_info/time_between_train_steps": 0.00563812255859375, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 27.80450439453125, "step": 336} +{"train_info/time_between_train_steps": 0.005463838577270508, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 27.760758638381958, "step": 337} +{"train_info/time_between_train_steps": 0.011183023452758789, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 27.96787142753601, "step": 338} +{"train_info/time_between_train_steps": 0.010629653930664062, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 27.862818479537964, "step": 339} +{"train_info/time_between_train_steps": 0.005630016326904297, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 27.880439043045044, "step": 340} +{"train_info/time_between_train_steps": 0.009151697158813477, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 27.74108362197876, "step": 341} +{"train_info/time_between_train_steps": 0.005712747573852539, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 27.737579107284546, "step": 342} +{"train_info/time_between_train_steps": 0.005723237991333008, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 27.888996839523315, "step": 343} +{"train_info/time_between_train_steps": 0.005724906921386719, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 27.900126695632935, "step": 344} +{"train_info/time_between_train_steps": 0.00576472282409668, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 27.77991485595703, "step": 345} +{"train_info/time_between_train_steps": 0.005737781524658203, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 27.755058526992798, "step": 346} +{"train_info/time_between_train_steps": 0.005833625793457031, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 27.87140202522278, "step": 347} +{"train_info/time_between_train_steps": 0.006145477294921875, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 27.76071000099182, "step": 348} +{"train_info/time_between_train_steps": 0.009606361389160156, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 27.907763481140137, "step": 349} +{"train_info/time_between_train_steps": 0.006819486618041992, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 27.802805185317993, "step": 350} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736757149, "_runtime": 10309}, "step": 350} +{"logs": {"train/loss": 5.246, "train/learning_rate": 0.00047222222222222224, "train/epoch": 13.02, "_timestamp": 1736757149, "_runtime": 10309}, "step": 350} +{"train_info/time_between_train_steps": 0.03581690788269043, "step": 350} +{"train_info/time_between_train_steps": 13.672938346862793, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 27.829534769058228, "step": 351} +{"train_info/time_between_train_steps": 0.008732318878173828, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 27.902671813964844, "step": 352} +{"train_info/time_between_train_steps": 0.0057010650634765625, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 27.95486092567444, "step": 353} +{"train_info/time_between_train_steps": 0.005754947662353516, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 27.865506649017334, "step": 354} +{"train_info/time_between_train_steps": 0.005700826644897461, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 28.020155906677246, "step": 355} +{"train_info/time_between_train_steps": 0.005961894989013672, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 27.78415870666504, "step": 356} +{"train_info/time_between_train_steps": 0.005504131317138672, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 27.884090185165405, "step": 357} +{"train_info/time_between_train_steps": 0.010553359985351562, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 27.763609886169434, "step": 358} +{"train_info/time_between_train_steps": 0.0056803226470947266, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 27.865297317504883, "step": 359} +{"train_info/time_between_train_steps": 0.005615711212158203, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 27.761242866516113, "step": 360} +{"train_info/time_between_train_steps": 0.011067628860473633, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 27.761849403381348, "step": 361} +{"train_info/time_between_train_steps": 0.010827064514160156, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 27.798141479492188, "step": 362} +{"train_info/time_between_train_steps": 0.0055239200592041016, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 27.78304696083069, "step": 363} +{"train_info/time_between_train_steps": 0.011049985885620117, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 27.876513957977295, "step": 364} +{"train_info/time_between_train_steps": 0.005495309829711914, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 27.7836012840271, "step": 365} +{"train_info/time_between_train_steps": 0.00574040412902832, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 27.997051000595093, "step": 366} +{"train_info/time_between_train_steps": 0.005559682846069336, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 27.790851354599, "step": 367} +{"train_info/time_between_train_steps": 0.005453824996948242, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 27.850534677505493, "step": 368} +{"train_info/time_between_train_steps": 0.005724668502807617, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 27.744012355804443, "step": 369} +{"train_info/time_between_train_steps": 0.005672931671142578, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 27.94367742538452, "step": 370} +{"train_info/time_between_train_steps": 0.0059053897857666016, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 27.736884355545044, "step": 371} +{"train_info/time_between_train_steps": 0.005629777908325195, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 27.870585441589355, "step": 372} +{"train_info/time_between_train_steps": 0.006106853485107422, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 27.77238440513611, "step": 373} +{"train_info/time_between_train_steps": 0.006996631622314453, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 27.81383490562439, "step": 374} +{"train_info/time_between_train_steps": 0.0058214664459228516, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 27.806660413742065, "step": 375} +{"train_info/time_between_train_steps": 0.006384134292602539, "step": 375} +{"train_info/time_between_train_steps": 13.720146179199219, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 27.822575330734253, "step": 376} +{"train_info/time_between_train_steps": 0.01074075698852539, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 27.894890785217285, "step": 377} +{"train_info/time_between_train_steps": 0.005674123764038086, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 27.890782117843628, "step": 378} +{"train_info/time_between_train_steps": 0.005715847015380859, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 27.861259698867798, "step": 379} +{"train_info/time_between_train_steps": 0.00585484504699707, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 27.846787452697754, "step": 380} +{"train_info/time_between_train_steps": 0.0058591365814208984, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 27.77725648880005, "step": 381} +{"train_info/time_between_train_steps": 0.005410909652709961, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 27.73593258857727, "step": 382} +{"train_info/time_between_train_steps": 0.005494594573974609, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 27.784322500228882, "step": 383} +{"train_info/time_between_train_steps": 0.005550384521484375, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 27.789801120758057, "step": 384} +{"train_info/time_between_train_steps": 0.005553483963012695, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 27.95819926261902, "step": 385} +{"train_info/time_between_train_steps": 0.005753755569458008, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 27.775228261947632, "step": 386} +{"train_info/time_between_train_steps": 0.005646467208862305, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 27.899457216262817, "step": 387} +{"train_info/time_between_train_steps": 0.005520820617675781, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 27.77800416946411, "step": 388} +{"train_info/time_between_train_steps": 0.005672454833984375, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 27.9050350189209, "step": 389} +{"train_info/time_between_train_steps": 0.005547285079956055, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 27.786399126052856, "step": 390} +{"train_info/time_between_train_steps": 0.006570100784301758, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 27.888431787490845, "step": 391} +{"train_info/time_between_train_steps": 0.0056247711181640625, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 27.76745295524597, "step": 392} +{"train_info/time_between_train_steps": 0.00647425651550293, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 27.89164972305298, "step": 393} +{"train_info/time_between_train_steps": 0.008749246597290039, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 27.784865379333496, "step": 394} +{"train_info/time_between_train_steps": 0.005658864974975586, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 27.81144952774048, "step": 395} +{"train_info/time_between_train_steps": 0.006025075912475586, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 27.903204441070557, "step": 396} +{"train_info/time_between_train_steps": 0.005610942840576172, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 27.757136821746826, "step": 397} +{"train_info/time_between_train_steps": 0.0059015750885009766, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 27.911821603775024, "step": 398} +{"train_info/time_between_train_steps": 0.005837678909301758, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 27.761824131011963, "step": 399} +{"train_info/time_between_train_steps": 0.005975246429443359, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 32.74362325668335, "step": 400} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736758577, "_runtime": 11737}, "step": 400} +{"logs": {"train/loss": 5.0966, "train/learning_rate": 0.00044444444444444436, "train/epoch": 15.02, "_timestamp": 1736758577, "_runtime": 11737}, "step": 400} +{"train_info/time_between_train_steps": 146.34073567390442, "step": 400} +{"train_info/time_between_train_steps": 160.3770453929901, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 27.930673599243164, "step": 401} +{"train_info/time_between_train_steps": 0.008297443389892578, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 27.93900752067566, "step": 402} +{"train_info/time_between_train_steps": 0.007385969161987305, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 27.859297513961792, "step": 403} +{"train_info/time_between_train_steps": 0.005764007568359375, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 27.92310070991516, "step": 404} +{"train_info/time_between_train_steps": 0.01096796989440918, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 27.787359952926636, "step": 405} +{"train_info/time_between_train_steps": 0.006888389587402344, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 27.798351526260376, "step": 406} +{"train_info/time_between_train_steps": 0.005385875701904297, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 27.776221990585327, "step": 407} +{"train_info/time_between_train_steps": 0.005511760711669922, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 27.824679374694824, "step": 408} +{"train_info/time_between_train_steps": 0.010677337646484375, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 27.731338262557983, "step": 409} +{"train_info/time_between_train_steps": 0.0054779052734375, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 27.801964282989502, "step": 410} +{"train_info/time_between_train_steps": 0.005570411682128906, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 27.780779361724854, "step": 411} +{"train_info/time_between_train_steps": 0.007804155349731445, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 27.875684022903442, "step": 412} +{"train_info/time_between_train_steps": 0.005880832672119141, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 27.790563821792603, "step": 413} +{"train_info/time_between_train_steps": 0.0056684017181396484, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 27.874552726745605, "step": 414} +{"train_info/time_between_train_steps": 0.005554676055908203, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 27.764220714569092, "step": 415} +{"train_info/time_between_train_steps": 0.00570225715637207, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 28.02013611793518, "step": 416} +{"train_info/time_between_train_steps": 0.006060600280761719, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 27.785148859024048, "step": 417} +{"train_info/time_between_train_steps": 0.00558924674987793, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 27.77422261238098, "step": 418} +{"train_info/time_between_train_steps": 0.006064176559448242, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 27.803897619247437, "step": 419} +{"train_info/time_between_train_steps": 0.005680084228515625, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 27.846096992492676, "step": 420} +{"train_info/time_between_train_steps": 0.00583195686340332, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 27.75703239440918, "step": 421} +{"train_info/time_between_train_steps": 0.005659818649291992, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 27.74138331413269, "step": 422} +{"train_info/time_between_train_steps": 0.0058062076568603516, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 27.850412368774414, "step": 423} +{"train_info/time_between_train_steps": 0.0058135986328125, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 27.750575065612793, "step": 424} +{"train_info/time_between_train_steps": 0.0059814453125, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 27.89055633544922, "step": 425} +{"train_info/time_between_train_steps": 0.00632166862487793, "step": 425} +{"train_info/time_between_train_steps": 14.022771835327148, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 27.76736354827881, "step": 426} +{"train_info/time_between_train_steps": 0.005336284637451172, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 28.071841955184937, "step": 427} +{"train_info/time_between_train_steps": 0.007115602493286133, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 27.814699411392212, "step": 428} +{"train_info/time_between_train_steps": 0.006274700164794922, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 28.103232383728027, "step": 429} +{"train_info/time_between_train_steps": 0.006810188293457031, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 27.776268243789673, "step": 430} +{"train_info/time_between_train_steps": 0.006524085998535156, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 27.841662883758545, "step": 431} +{"train_info/time_between_train_steps": 0.01114511489868164, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 27.956877946853638, "step": 432} +{"train_info/time_between_train_steps": 0.0057525634765625, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 27.794960260391235, "step": 433} +{"train_info/time_between_train_steps": 0.005731105804443359, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 27.743290424346924, "step": 434} +{"train_info/time_between_train_steps": 0.0057811737060546875, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 27.82430100440979, "step": 435} +{"train_info/time_between_train_steps": 0.005964994430541992, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 27.770343542099, "step": 436} +{"train_info/time_between_train_steps": 0.005826711654663086, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 27.913715839385986, "step": 437} +{"train_info/time_between_train_steps": 0.011034727096557617, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 27.772535800933838, "step": 438} +{"train_info/time_between_train_steps": 0.005613565444946289, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 27.833475351333618, "step": 439} +{"train_info/time_between_train_steps": 0.005671262741088867, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 27.78352928161621, "step": 440} +{"train_info/time_between_train_steps": 0.010058879852294922, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 27.77319574356079, "step": 441} +{"train_info/time_between_train_steps": 0.009563207626342773, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 27.824572801589966, "step": 442} +{"train_info/time_between_train_steps": 0.0057239532470703125, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 27.742985486984253, "step": 443} +{"train_info/time_between_train_steps": 0.005723237991333008, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 27.849098682403564, "step": 444} +{"train_info/time_between_train_steps": 0.0058782100677490234, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 27.774009227752686, "step": 445} +{"train_info/time_between_train_steps": 0.00565648078918457, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 27.95772433280945, "step": 446} +{"train_info/time_between_train_steps": 0.01088571548461914, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 27.868340015411377, "step": 447} +{"train_info/time_between_train_steps": 0.006025791168212891, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 27.754610776901245, "step": 448} +{"train_info/time_between_train_steps": 0.005742311477661133, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 27.764692306518555, "step": 449} +{"train_info/time_between_train_steps": 0.006181240081787109, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 27.905702829360962, "step": 450} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736760145, "_runtime": 13305}, "step": 450} +{"logs": {"train/loss": 4.9523, "train/learning_rate": 0.00041666666666666664, "train/epoch": 17.02, "_timestamp": 1736760145, "_runtime": 13305}, "step": 450} +{"train_info/time_between_train_steps": 0.08137154579162598, "step": 450} +{"train_info/time_between_train_steps": 13.917407035827637, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 27.771644353866577, "step": 451} +{"train_info/time_between_train_steps": 0.005935192108154297, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 28.035470247268677, "step": 452} +{"train_info/time_between_train_steps": 0.0058329105377197266, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 27.775732278823853, "step": 453} +{"train_info/time_between_train_steps": 0.008930206298828125, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 27.955775022506714, "step": 454} +{"train_info/time_between_train_steps": 0.00585174560546875, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 27.80487871170044, "step": 455} +{"train_info/time_between_train_steps": 0.0057964324951171875, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 27.771671295166016, "step": 456} +{"train_info/time_between_train_steps": 0.005312442779541016, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 27.719039916992188, "step": 457} +{"train_info/time_between_train_steps": 0.005512237548828125, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 27.85925030708313, "step": 458} +{"train_info/time_between_train_steps": 0.0054590702056884766, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 27.74871325492859, "step": 459} +{"train_info/time_between_train_steps": 0.0057353973388671875, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 27.77297019958496, "step": 460} +{"train_info/time_between_train_steps": 0.005467653274536133, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 27.80920910835266, "step": 461} +{"train_info/time_between_train_steps": 0.005609035491943359, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 27.857674598693848, "step": 462} +{"train_info/time_between_train_steps": 0.005645036697387695, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 27.82403874397278, "step": 463} +{"train_info/time_between_train_steps": 0.005536317825317383, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 27.758688926696777, "step": 464} +{"train_info/time_between_train_steps": 0.010559558868408203, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 27.82383418083191, "step": 465} +{"train_info/time_between_train_steps": 0.0055789947509765625, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 27.808636903762817, "step": 466} +{"train_info/time_between_train_steps": 0.008398056030273438, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 27.894846200942993, "step": 467} +{"train_info/time_between_train_steps": 0.005574226379394531, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 27.739683151245117, "step": 468} +{"train_info/time_between_train_steps": 0.005764007568359375, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 27.852718114852905, "step": 469} +{"train_info/time_between_train_steps": 0.005814075469970703, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 27.729591131210327, "step": 470} +{"train_info/time_between_train_steps": 0.005690097808837891, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 27.91780924797058, "step": 471} +{"train_info/time_between_train_steps": 0.005694150924682617, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 27.80287766456604, "step": 472} +{"train_info/time_between_train_steps": 0.005873680114746094, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 27.8804669380188, "step": 473} +{"train_info/time_between_train_steps": 0.005713462829589844, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 27.771337270736694, "step": 474} +{"train_info/time_between_train_steps": 0.006093263626098633, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 27.790838956832886, "step": 475} +{"train_info/time_between_train_steps": 0.006242990493774414, "step": 475} +{"train_info/time_between_train_steps": 14.096456050872803, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 27.74424910545349, "step": 476} +{"train_info/time_between_train_steps": 0.005565166473388672, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 27.991215467453003, "step": 477} +{"train_info/time_between_train_steps": 0.005641460418701172, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 27.906535387039185, "step": 478} +{"train_info/time_between_train_steps": 0.00583958625793457, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 27.85263705253601, "step": 479} +{"train_info/time_between_train_steps": 0.007256507873535156, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 27.74119520187378, "step": 480} +{"train_info/time_between_train_steps": 0.008090496063232422, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 27.79653024673462, "step": 481} +{"train_info/time_between_train_steps": 0.00869297981262207, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 27.875027179718018, "step": 482} +{"train_info/time_between_train_steps": 0.005627632141113281, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 27.733816146850586, "step": 483} +{"train_info/time_between_train_steps": 0.006639242172241211, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 27.863032817840576, "step": 484} +{"train_info/time_between_train_steps": 0.005505800247192383, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 27.763465642929077, "step": 485} +{"train_info/time_between_train_steps": 0.009558677673339844, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 27.87151789665222, "step": 486} +{"train_info/time_between_train_steps": 0.008188247680664062, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 27.7383291721344, "step": 487} +{"train_info/time_between_train_steps": 0.010975837707519531, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 27.85940384864807, "step": 488} +{"train_info/time_between_train_steps": 0.00931549072265625, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 27.748836278915405, "step": 489} +{"train_info/time_between_train_steps": 0.005563259124755859, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 27.874491453170776, "step": 490} +{"train_info/time_between_train_steps": 0.0055370330810546875, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 27.74721598625183, "step": 491} +{"train_info/time_between_train_steps": 0.010536670684814453, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 27.940787315368652, "step": 492} +{"train_info/time_between_train_steps": 0.005513906478881836, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 27.881502866744995, "step": 493} +{"train_info/time_between_train_steps": 0.007837533950805664, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 27.78162693977356, "step": 494} +{"train_info/time_between_train_steps": 0.009488582611083984, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 27.800063610076904, "step": 495} +{"train_info/time_between_train_steps": 0.005667209625244141, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 27.768559217453003, "step": 496} +{"train_info/time_between_train_steps": 0.0058972835540771484, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 27.85468554496765, "step": 497} +{"train_info/time_between_train_steps": 0.0061032772064208984, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 27.85795259475708, "step": 498} +{"train_info/time_between_train_steps": 0.005774736404418945, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 27.87790298461914, "step": 499} +{"train_info/time_between_train_steps": 0.010905742645263672, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 28.822477102279663, "step": 500} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736761568, "_runtime": 14728}, "step": 500} +{"logs": {"train/loss": 4.7992, "train/learning_rate": 0.00038888888888888887, "train/epoch": 19.02, "_timestamp": 1736761568, "_runtime": 14728}, "step": 500} +{"train_info/time_between_train_steps": 191.93787097930908, "step": 500} +{"train_info/time_between_train_steps": 205.56485724449158, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 27.77135920524597, "step": 501} +{"train_info/time_between_train_steps": 0.005406856536865234, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 27.89014196395874, "step": 502} +{"train_info/time_between_train_steps": 0.005728244781494141, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 27.859212160110474, "step": 503} +{"train_info/time_between_train_steps": 0.005766868591308594, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 27.883710622787476, "step": 504} +{"train_info/time_between_train_steps": 0.00565028190612793, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 27.771097660064697, "step": 505} +{"train_info/time_between_train_steps": 0.005894184112548828, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 27.86460781097412, "step": 506} +{"train_info/time_between_train_steps": 0.0055196285247802734, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 27.757519006729126, "step": 507} +{"train_info/time_between_train_steps": 0.005695343017578125, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 27.86764121055603, "step": 508} +{"train_info/time_between_train_steps": 0.010492563247680664, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 27.899515628814697, "step": 509} +{"train_info/time_between_train_steps": 0.005602836608886719, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 27.895812034606934, "step": 510} +{"train_info/time_between_train_steps": 0.005593061447143555, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 27.720125675201416, "step": 511} +{"train_info/time_between_train_steps": 0.00549006462097168, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 27.88310217857361, "step": 512} +{"train_info/time_between_train_steps": 0.005568981170654297, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 27.757140636444092, "step": 513} +{"train_info/time_between_train_steps": 0.005719184875488281, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 27.853209257125854, "step": 514} +{"train_info/time_between_train_steps": 0.010556697845458984, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 27.79724144935608, "step": 515} +{"train_info/time_between_train_steps": 0.0056912899017333984, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 27.79615592956543, "step": 516} +{"train_info/time_between_train_steps": 0.0055942535400390625, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 27.73993420600891, "step": 517} +{"train_info/time_between_train_steps": 0.005591869354248047, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 27.74241852760315, "step": 518} +{"train_info/time_between_train_steps": 0.005723714828491211, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 27.875690698623657, "step": 519} +{"train_info/time_between_train_steps": 0.008096694946289062, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 27.856738805770874, "step": 520} +{"train_info/time_between_train_steps": 0.010666847229003906, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 27.909271955490112, "step": 521} +{"train_info/time_between_train_steps": 0.005600929260253906, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 27.74250316619873, "step": 522} +{"train_info/time_between_train_steps": 0.005782365798950195, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 27.93693733215332, "step": 523} +{"train_info/time_between_train_steps": 0.006006002426147461, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 27.86542248725891, "step": 524} +{"train_info/time_between_train_steps": 0.007943868637084961, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 27.94295883178711, "step": 525} +{"train_info/time_between_train_steps": 0.0063779354095458984, "step": 525} +{"train_info/time_between_train_steps": 13.783812999725342, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 27.77751350402832, "step": 526} +{"train_info/time_between_train_steps": 0.005461931228637695, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 28.07094383239746, "step": 527} +{"train_info/time_between_train_steps": 0.005792140960693359, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 27.76884627342224, "step": 528} +{"train_info/time_between_train_steps": 0.005696296691894531, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 27.92008876800537, "step": 529} +{"train_info/time_between_train_steps": 0.005856513977050781, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 27.76482367515564, "step": 530} +{"train_info/time_between_train_steps": 0.0057599544525146484, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 27.896644830703735, "step": 531} +{"train_info/time_between_train_steps": 0.005393028259277344, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 27.757063150405884, "step": 532} +{"train_info/time_between_train_steps": 0.005552530288696289, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 27.868833541870117, "step": 533} +{"train_info/time_between_train_steps": 0.005511045455932617, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 27.743918657302856, "step": 534} +{"train_info/time_between_train_steps": 0.00581669807434082, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 27.845094442367554, "step": 535} +{"train_info/time_between_train_steps": 0.005495309829711914, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 27.731733083724976, "step": 536} +{"train_info/time_between_train_steps": 0.0054781436920166016, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 27.769104957580566, "step": 537} +{"train_info/time_between_train_steps": 0.01050567626953125, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 27.815809726715088, "step": 538} +{"train_info/time_between_train_steps": 0.005487918853759766, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 27.817484378814697, "step": 539} +{"train_info/time_between_train_steps": 0.005643606185913086, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 27.85663080215454, "step": 540} +{"train_info/time_between_train_steps": 0.005713939666748047, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 27.732864141464233, "step": 541} +{"train_info/time_between_train_steps": 0.011210441589355469, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 27.895726203918457, "step": 542} +{"train_info/time_between_train_steps": 0.00820016860961914, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 27.746416091918945, "step": 543} +{"train_info/time_between_train_steps": 0.009363889694213867, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 27.90461492538452, "step": 544} +{"train_info/time_between_train_steps": 0.005858898162841797, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 27.76597571372986, "step": 545} +{"train_info/time_between_train_steps": 0.006100177764892578, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 27.910532474517822, "step": 546} +{"train_info/time_between_train_steps": 0.005838871002197266, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 27.79033064842224, "step": 547} +{"train_info/time_between_train_steps": 0.006039619445800781, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 27.902571201324463, "step": 548} +{"train_info/time_between_train_steps": 0.006013631820678711, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 27.81618094444275, "step": 549} +{"train_info/time_between_train_steps": 0.011327743530273438, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 27.868263959884644, "step": 550} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736763181, "_runtime": 16341}, "step": 550} +{"logs": {"train/loss": 4.6133, "train/learning_rate": 0.0003611111111111111, "train/epoch": 21.02, "_timestamp": 1736763181, "_runtime": 16341}, "step": 550} +{"train_info/time_between_train_steps": 0.06667041778564453, "step": 550} +{"train_info/time_between_train_steps": 14.430286407470703, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 27.732701778411865, "step": 551} +{"train_info/time_between_train_steps": 0.014655828475952148, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 28.038628339767456, "step": 552} +{"train_info/time_between_train_steps": 0.005874156951904297, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 27.785367488861084, "step": 553} +{"train_info/time_between_train_steps": 0.005720615386962891, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 28.05447483062744, "step": 554} +{"train_info/time_between_train_steps": 0.0057599544525146484, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 27.859017372131348, "step": 555} +{"train_info/time_between_train_steps": 0.005781412124633789, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 27.897017240524292, "step": 556} +{"train_info/time_between_train_steps": 0.005595207214355469, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 27.7424955368042, "step": 557} +{"train_info/time_between_train_steps": 0.005532979965209961, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 27.805485725402832, "step": 558} +{"train_info/time_between_train_steps": 0.005482673645019531, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 27.895864009857178, "step": 559} +{"train_info/time_between_train_steps": 0.005610942840576172, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 27.784388303756714, "step": 560} +{"train_info/time_between_train_steps": 0.005654811859130859, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 27.869431972503662, "step": 561} +{"train_info/time_between_train_steps": 0.005603313446044922, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 27.762627601623535, "step": 562} +{"train_info/time_between_train_steps": 0.0058536529541015625, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 27.90786910057068, "step": 563} +{"train_info/time_between_train_steps": 0.005515098571777344, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 27.72266173362732, "step": 564} +{"train_info/time_between_train_steps": 0.0054683685302734375, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 27.866668939590454, "step": 565} +{"train_info/time_between_train_steps": 0.005600929260253906, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 27.73502278327942, "step": 566} +{"train_info/time_between_train_steps": 0.005606174468994141, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 27.884066581726074, "step": 567} +{"train_info/time_between_train_steps": 0.005807399749755859, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 27.799230098724365, "step": 568} +{"train_info/time_between_train_steps": 0.005718708038330078, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 27.8250949382782, "step": 569} +{"train_info/time_between_train_steps": 0.005746603012084961, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 27.970821142196655, "step": 570} +{"train_info/time_between_train_steps": 0.005972385406494141, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 27.746448755264282, "step": 571} +{"train_info/time_between_train_steps": 0.005846977233886719, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 27.755637645721436, "step": 572} +{"train_info/time_between_train_steps": 0.005923748016357422, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 27.747612476348877, "step": 573} +{"train_info/time_between_train_steps": 0.006071567535400391, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 27.884243488311768, "step": 574} +{"train_info/time_between_train_steps": 0.006078958511352539, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 27.76457643508911, "step": 575} +{"train_info/time_between_train_steps": 0.006145000457763672, "step": 575} +{"train_info/time_between_train_steps": 13.667015075683594, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 27.797316312789917, "step": 576} +{"train_info/time_between_train_steps": 0.006359100341796875, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 27.962877988815308, "step": 577} +{"train_info/time_between_train_steps": 0.005978584289550781, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 27.799033164978027, "step": 578} +{"train_info/time_between_train_steps": 0.0062694549560546875, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 27.95710062980652, "step": 579} +{"train_info/time_between_train_steps": 0.009356498718261719, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 27.893244743347168, "step": 580} +{"train_info/time_between_train_steps": 0.006416797637939453, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 27.86959934234619, "step": 581} +{"train_info/time_between_train_steps": 0.006668806076049805, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 27.91560196876526, "step": 582} +{"train_info/time_between_train_steps": 0.00564265251159668, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 27.755150318145752, "step": 583} +{"train_info/time_between_train_steps": 0.005742549896240234, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 27.8840115070343, "step": 584} +{"train_info/time_between_train_steps": 0.009069681167602539, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 27.74118185043335, "step": 585} +{"train_info/time_between_train_steps": 0.005579948425292969, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 27.987963914871216, "step": 586} +{"train_info/time_between_train_steps": 0.010699748992919922, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 27.779085397720337, "step": 587} +{"train_info/time_between_train_steps": 0.005658864974975586, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 27.900956869125366, "step": 588} +{"train_info/time_between_train_steps": 0.0056841373443603516, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 27.87045121192932, "step": 589} +{"train_info/time_between_train_steps": 0.0055773258209228516, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 27.80570340156555, "step": 590} +{"train_info/time_between_train_steps": 0.0055429935455322266, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 27.835399389266968, "step": 591} +{"train_info/time_between_train_steps": 0.0057904720306396484, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 27.83171033859253, "step": 592} +{"train_info/time_between_train_steps": 0.0057065486907958984, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 27.884296655654907, "step": 593} +{"train_info/time_between_train_steps": 0.005936384201049805, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 27.77096462249756, "step": 594} +{"train_info/time_between_train_steps": 0.009840011596679688, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 27.94739079475403, "step": 595} +{"train_info/time_between_train_steps": 0.0059664249420166016, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 27.78070092201233, "step": 596} +{"train_info/time_between_train_steps": 0.0069315433502197266, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 27.91527247428894, "step": 597} +{"train_info/time_between_train_steps": 0.006036520004272461, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 27.781660556793213, "step": 598} +{"train_info/time_between_train_steps": 0.010393142700195312, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 27.89413833618164, "step": 599} +{"train_info/time_between_train_steps": 0.006036043167114258, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 27.767997980117798, "step": 600} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736764605, "_runtime": 17765}, "step": 600} +{"logs": {"train/loss": 4.438, "train/learning_rate": 0.0003333333333333333, "train/epoch": 23.02, "_timestamp": 1736764605, "_runtime": 17765}, "step": 600} +{"train_info/time_between_train_steps": 243.0378279685974, "step": 600} +{"train_info/time_between_train_steps": 256.7769718170166, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 27.862463235855103, "step": 601} +{"train_info/time_between_train_steps": 0.0077610015869140625, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 27.970770835876465, "step": 602} +{"train_info/time_between_train_steps": 0.0063250064849853516, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 27.799217462539673, "step": 603} +{"train_info/time_between_train_steps": 0.006463766098022461, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 28.004334211349487, "step": 604} +{"train_info/time_between_train_steps": 0.005919218063354492, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 27.786115169525146, "step": 605} +{"train_info/time_between_train_steps": 0.006108522415161133, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 27.84023666381836, "step": 606} +{"train_info/time_between_train_steps": 0.005667448043823242, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 27.752477884292603, "step": 607} +{"train_info/time_between_train_steps": 0.005544185638427734, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 27.857250690460205, "step": 608} +{"train_info/time_between_train_steps": 0.012639999389648438, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 27.76327347755432, "step": 609} +{"train_info/time_between_train_steps": 0.005695343017578125, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 27.878995895385742, "step": 610} +{"train_info/time_between_train_steps": 0.005777835845947266, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 27.751941919326782, "step": 611} +{"train_info/time_between_train_steps": 0.005476951599121094, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 27.803937673568726, "step": 612} +{"train_info/time_between_train_steps": 0.005715370178222656, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 27.857542514801025, "step": 613} +{"train_info/time_between_train_steps": 0.005567073822021484, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 27.786051273345947, "step": 614} +{"train_info/time_between_train_steps": 0.0056304931640625, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 27.856029748916626, "step": 615} +{"train_info/time_between_train_steps": 0.005684852600097656, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 27.83570098876953, "step": 616} +{"train_info/time_between_train_steps": 0.0055506229400634766, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 27.828906536102295, "step": 617} +{"train_info/time_between_train_steps": 0.0057866573333740234, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 27.745762586593628, "step": 618} +{"train_info/time_between_train_steps": 0.005536556243896484, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 27.897931575775146, "step": 619} +{"train_info/time_between_train_steps": 0.005663156509399414, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 27.769545555114746, "step": 620} +{"train_info/time_between_train_steps": 0.005708456039428711, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 27.920679569244385, "step": 621} +{"train_info/time_between_train_steps": 0.005554914474487305, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 27.77170181274414, "step": 622} +{"train_info/time_between_train_steps": 0.0061872005462646484, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 27.872432708740234, "step": 623} +{"train_info/time_between_train_steps": 0.0055828094482421875, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 27.775578260421753, "step": 624} +{"train_info/time_between_train_steps": 0.006834983825683594, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 27.826213359832764, "step": 625} +{"train_info/time_between_train_steps": 0.007031679153442383, "step": 625} +{"train_info/time_between_train_steps": 13.987961053848267, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 27.751076221466064, "step": 626} +{"train_info/time_between_train_steps": 0.005934238433837891, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 27.97437572479248, "step": 627} +{"train_info/time_between_train_steps": 0.005797624588012695, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 27.77997851371765, "step": 628} +{"train_info/time_between_train_steps": 0.010710000991821289, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 28.04901695251465, "step": 629} +{"train_info/time_between_train_steps": 0.006076812744140625, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 27.789891719818115, "step": 630} +{"train_info/time_between_train_steps": 0.008345603942871094, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 27.928303003311157, "step": 631} +{"train_info/time_between_train_steps": 0.005797147750854492, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 27.880406618118286, "step": 632} +{"train_info/time_between_train_steps": 0.00563812255859375, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 27.76958918571472, "step": 633} +{"train_info/time_between_train_steps": 0.005702972412109375, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 27.7690486907959, "step": 634} +{"train_info/time_between_train_steps": 0.006181955337524414, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 27.789633750915527, "step": 635} +{"train_info/time_between_train_steps": 0.005632162094116211, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 27.802901029586792, "step": 636} +{"train_info/time_between_train_steps": 0.005809307098388672, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 27.781079292297363, "step": 637} +{"train_info/time_between_train_steps": 0.005737781524658203, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 27.890536069869995, "step": 638} +{"train_info/time_between_train_steps": 0.00828695297241211, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 27.757305145263672, "step": 639} +{"train_info/time_between_train_steps": 0.005878448486328125, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 27.824612617492676, "step": 640} +{"train_info/time_between_train_steps": 0.005553722381591797, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 27.750709056854248, "step": 641} +{"train_info/time_between_train_steps": 0.0056760311126708984, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 28.081342697143555, "step": 642} +{"train_info/time_between_train_steps": 0.010579824447631836, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 27.938093423843384, "step": 643} +{"train_info/time_between_train_steps": 0.005594491958618164, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 27.726019382476807, "step": 644} +{"train_info/time_between_train_steps": 0.0056760311126708984, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 27.735858917236328, "step": 645} +{"train_info/time_between_train_steps": 0.005751132965087891, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 27.782812356948853, "step": 646} +{"train_info/time_between_train_steps": 0.0056917667388916016, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 27.89855933189392, "step": 647} +{"train_info/time_between_train_steps": 0.0059163570404052734, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 27.751590251922607, "step": 648} +{"train_info/time_between_train_steps": 0.005916595458984375, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 27.893304586410522, "step": 649} +{"train_info/time_between_train_steps": 0.00587153434753418, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 27.788249015808105, "step": 650} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736766269, "_runtime": 19429}, "step": 650} +{"logs": {"train/loss": 4.2932, "train/learning_rate": 0.00030555555555555555, "train/epoch": 25.02, "_timestamp": 1736766269, "_runtime": 19429}, "step": 650} +{"train_info/time_between_train_steps": 0.07325053215026855, "step": 650} +{"train_info/time_between_train_steps": 14.048930406570435, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 27.793705463409424, "step": 651} +{"train_info/time_between_train_steps": 0.006474971771240234, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 28.05437445640564, "step": 652} +{"train_info/time_between_train_steps": 0.0062487125396728516, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 27.7910258769989, "step": 653} +{"train_info/time_between_train_steps": 0.008018016815185547, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 27.940904140472412, "step": 654} +{"train_info/time_between_train_steps": 0.008332490921020508, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 27.889665603637695, "step": 655} +{"train_info/time_between_train_steps": 0.006216287612915039, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 27.848442316055298, "step": 656} +{"train_info/time_between_train_steps": 0.005612611770629883, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 27.73922371864319, "step": 657} +{"train_info/time_between_train_steps": 0.00966787338256836, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 27.764217376708984, "step": 658} +{"train_info/time_between_train_steps": 0.0057392120361328125, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 27.853891611099243, "step": 659} +{"train_info/time_between_train_steps": 0.0058307647705078125, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 27.756903409957886, "step": 660} +{"train_info/time_between_train_steps": 0.005812168121337891, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 27.866151809692383, "step": 661} +{"train_info/time_between_train_steps": 0.005666494369506836, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 27.817842483520508, "step": 662} +{"train_info/time_between_train_steps": 0.010584831237792969, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 27.92896580696106, "step": 663} +{"train_info/time_between_train_steps": 0.005910158157348633, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 27.79496169090271, "step": 664} +{"train_info/time_between_train_steps": 0.006045341491699219, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 27.75929307937622, "step": 665} +{"train_info/time_between_train_steps": 0.005606174468994141, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 27.80334782600403, "step": 666} +{"train_info/time_between_train_steps": 0.005910158157348633, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 27.806581258773804, "step": 667} +{"train_info/time_between_train_steps": 0.005636692047119141, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 27.875200033187866, "step": 668} +{"train_info/time_between_train_steps": 0.00591588020324707, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 27.80092692375183, "step": 669} +{"train_info/time_between_train_steps": 0.0070037841796875, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 27.816444873809814, "step": 670} +{"train_info/time_between_train_steps": 0.005890607833862305, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 27.802376985549927, "step": 671} +{"train_info/time_between_train_steps": 0.009394645690917969, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 27.90004801750183, "step": 672} +{"train_info/time_between_train_steps": 0.009356260299682617, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 27.829951286315918, "step": 673} +{"train_info/time_between_train_steps": 0.005987405776977539, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 27.823943853378296, "step": 674} +{"train_info/time_between_train_steps": 0.006229877471923828, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 27.790590047836304, "step": 675} +{"train_info/time_between_train_steps": 0.011559724807739258, "step": 675} +{"train_info/time_between_train_steps": 13.884723901748657, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 27.90453290939331, "step": 676} +{"train_info/time_between_train_steps": 0.00588226318359375, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 27.925422191619873, "step": 677} +{"train_info/time_between_train_steps": 0.008405208587646484, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 28.114108085632324, "step": 678} +{"train_info/time_between_train_steps": 0.0056552886962890625, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 27.96206045150757, "step": 679} +{"train_info/time_between_train_steps": 0.005942583084106445, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 27.75729203224182, "step": 680} +{"train_info/time_between_train_steps": 0.00572967529296875, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 27.822157859802246, "step": 681} +{"train_info/time_between_train_steps": 0.005483388900756836, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 27.830118656158447, "step": 682} +{"train_info/time_between_train_steps": 0.008438587188720703, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 27.802491903305054, "step": 683} +{"train_info/time_between_train_steps": 0.005516529083251953, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 27.806267023086548, "step": 684} +{"train_info/time_between_train_steps": 0.005821704864501953, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 27.80750346183777, "step": 685} +{"train_info/time_between_train_steps": 0.010397672653198242, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 27.815686225891113, "step": 686} +{"train_info/time_between_train_steps": 0.005605459213256836, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 27.762213706970215, "step": 687} +{"train_info/time_between_train_steps": 0.005652427673339844, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 27.791433572769165, "step": 688} +{"train_info/time_between_train_steps": 0.005501508712768555, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 27.79002094268799, "step": 689} +{"train_info/time_between_train_steps": 0.005594730377197266, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 27.81522822380066, "step": 690} +{"train_info/time_between_train_steps": 0.005621671676635742, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 27.78444504737854, "step": 691} +{"train_info/time_between_train_steps": 0.005755424499511719, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 27.77341938018799, "step": 692} +{"train_info/time_between_train_steps": 0.00559687614440918, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 28.02934193611145, "step": 693} +{"train_info/time_between_train_steps": 0.005845069885253906, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 27.813538789749146, "step": 694} +{"train_info/time_between_train_steps": 0.005800485610961914, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 27.774604320526123, "step": 695} +{"train_info/time_between_train_steps": 0.010819673538208008, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 27.75965690612793, "step": 696} +{"train_info/time_between_train_steps": 0.005785942077636719, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 27.84617853164673, "step": 697} +{"train_info/time_between_train_steps": 0.006319522857666016, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 27.775803089141846, "step": 698} +{"train_info/time_between_train_steps": 0.0058078765869140625, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 27.896674394607544, "step": 699} +{"train_info/time_between_train_steps": 0.006133317947387695, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 27.930365324020386, "step": 700} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736767691, "_runtime": 20851}, "step": 700} +{"logs": {"train/loss": 4.1681, "train/learning_rate": 0.0002777777777777778, "train/epoch": 27.02, "_timestamp": 1736767691, "_runtime": 20851}, "step": 700} +{"train_info/time_between_train_steps": 90.98682904243469, "step": 700} +{"train_info/time_between_train_steps": 104.91652989387512, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 27.86845588684082, "step": 701} +{"train_info/time_between_train_steps": 0.008730173110961914, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 28.067144870758057, "step": 702} +{"train_info/time_between_train_steps": 0.005959033966064453, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 27.805614471435547, "step": 703} +{"train_info/time_between_train_steps": 0.007863044738769531, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 28.03135919570923, "step": 704} +{"train_info/time_between_train_steps": 0.00619196891784668, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 27.912968635559082, "step": 705} +{"train_info/time_between_train_steps": 0.006951570510864258, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 27.880510568618774, "step": 706} +{"train_info/time_between_train_steps": 0.008270502090454102, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 27.90938901901245, "step": 707} +{"train_info/time_between_train_steps": 0.005677938461303711, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 27.780835390090942, "step": 708} +{"train_info/time_between_train_steps": 0.0056610107421875, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 27.883012771606445, "step": 709} +{"train_info/time_between_train_steps": 0.005722522735595703, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 27.768808364868164, "step": 710} +{"train_info/time_between_train_steps": 0.005590677261352539, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 27.852365732192993, "step": 711} +{"train_info/time_between_train_steps": 0.008542299270629883, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 27.781548500061035, "step": 712} +{"train_info/time_between_train_steps": 0.005550861358642578, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 27.895654439926147, "step": 713} +{"train_info/time_between_train_steps": 0.005598306655883789, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 27.79906964302063, "step": 714} +{"train_info/time_between_train_steps": 0.010883569717407227, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 27.890501499176025, "step": 715} +{"train_info/time_between_train_steps": 0.005887269973754883, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 27.804932832717896, "step": 716} +{"train_info/time_between_train_steps": 0.0060727596282958984, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 27.781813859939575, "step": 717} +{"train_info/time_between_train_steps": 0.005795955657958984, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 27.871602535247803, "step": 718} +{"train_info/time_between_train_steps": 0.0057370662689208984, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 27.77187943458557, "step": 719} +{"train_info/time_between_train_steps": 0.00575566291809082, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 27.854778289794922, "step": 720} +{"train_info/time_between_train_steps": 0.005828857421875, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 27.790355682373047, "step": 721} +{"train_info/time_between_train_steps": 0.005963325500488281, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 27.82884645462036, "step": 722} +{"train_info/time_between_train_steps": 0.011251449584960938, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 27.795560598373413, "step": 723} +{"train_info/time_between_train_steps": 0.006006002426147461, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 28.04048752784729, "step": 724} +{"train_info/time_between_train_steps": 0.006712198257446289, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 27.80498194694519, "step": 725} +{"train_info/time_between_train_steps": 0.006683826446533203, "step": 725} +{"train_info/time_between_train_steps": 13.58085012435913, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 27.853400230407715, "step": 726} +{"train_info/time_between_train_steps": 0.006087779998779297, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 27.916752576828003, "step": 727} +{"train_info/time_between_train_steps": 0.011106491088867188, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 27.85065770149231, "step": 728} +{"train_info/time_between_train_steps": 0.011234045028686523, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 27.935606718063354, "step": 729} +{"train_info/time_between_train_steps": 0.005933284759521484, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 27.81359553337097, "step": 730} +{"train_info/time_between_train_steps": 0.005953788757324219, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 27.852521657943726, "step": 731} +{"train_info/time_between_train_steps": 0.005517482757568359, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 27.774268865585327, "step": 732} +{"train_info/time_between_train_steps": 0.005636453628540039, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 27.794076681137085, "step": 733} +{"train_info/time_between_train_steps": 0.005583286285400391, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 27.900126457214355, "step": 734} +{"train_info/time_between_train_steps": 0.005804300308227539, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 27.83560299873352, "step": 735} +{"train_info/time_between_train_steps": 0.010840415954589844, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 27.795251607894897, "step": 736} +{"train_info/time_between_train_steps": 0.005742311477661133, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 27.813223838806152, "step": 737} +{"train_info/time_between_train_steps": 0.005831718444824219, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 27.784859657287598, "step": 738} +{"train_info/time_between_train_steps": 0.005643129348754883, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 27.865166664123535, "step": 739} +{"train_info/time_between_train_steps": 0.005675792694091797, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 27.901453971862793, "step": 740} +{"train_info/time_between_train_steps": 0.005658626556396484, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 27.78086495399475, "step": 741} +{"train_info/time_between_train_steps": 0.005622386932373047, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 27.80949115753174, "step": 742} +{"train_info/time_between_train_steps": 0.006084442138671875, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 27.792263507843018, "step": 743} +{"train_info/time_between_train_steps": 0.0059359073638916016, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 27.79115343093872, "step": 744} +{"train_info/time_between_train_steps": 0.005925178527832031, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 27.89913010597229, "step": 745} +{"train_info/time_between_train_steps": 0.005796909332275391, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 27.771217107772827, "step": 746} +{"train_info/time_between_train_steps": 0.005877256393432617, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 27.882332801818848, "step": 747} +{"train_info/time_between_train_steps": 0.005986452102661133, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 27.788058757781982, "step": 748} +{"train_info/time_between_train_steps": 0.011435508728027344, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 27.894991636276245, "step": 749} +{"train_info/time_between_train_steps": 0.006208896636962891, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 27.8125422000885, "step": 750} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736769204, "_runtime": 22364}, "step": 750} +{"logs": {"train/loss": 4.0568, "train/learning_rate": 0.00025, "train/epoch": 29.02, "_timestamp": 1736769204, "_runtime": 22364}, "step": 750} +{"train_info/time_between_train_steps": 0.05371451377868652, "step": 750} +{"train_info/time_between_train_steps": 13.7799813747406, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 27.942331552505493, "step": 751} +{"train_info/time_between_train_steps": 0.00594329833984375, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 27.95063090324402, "step": 752} +{"train_info/time_between_train_steps": 0.0059070587158203125, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 27.902212142944336, "step": 753} +{"train_info/time_between_train_steps": 0.006079673767089844, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 27.98344922065735, "step": 754} +{"train_info/time_between_train_steps": 0.005903005599975586, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 27.876660585403442, "step": 755} +{"train_info/time_between_train_steps": 0.006178140640258789, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 27.843249082565308, "step": 756} +{"train_info/time_between_train_steps": 0.005733013153076172, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 27.814045429229736, "step": 757} +{"train_info/time_between_train_steps": 0.005830526351928711, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 27.76922369003296, "step": 758} +{"train_info/time_between_train_steps": 0.005860328674316406, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 27.803847312927246, "step": 759} +{"train_info/time_between_train_steps": 0.005881786346435547, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 27.961090087890625, "step": 760} +{"train_info/time_between_train_steps": 0.005605220794677734, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 27.759753227233887, "step": 761} +{"train_info/time_between_train_steps": 0.010978221893310547, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 27.829025506973267, "step": 762} +{"train_info/time_between_train_steps": 0.005658864974975586, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 27.77981185913086, "step": 763} +{"train_info/time_between_train_steps": 0.005843639373779297, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 27.840285062789917, "step": 764} +{"train_info/time_between_train_steps": 0.005734920501708984, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 27.832879304885864, "step": 765} +{"train_info/time_between_train_steps": 0.005729198455810547, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 27.809801816940308, "step": 766} +{"train_info/time_between_train_steps": 0.010479927062988281, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 27.79122805595398, "step": 767} +{"train_info/time_between_train_steps": 0.005779266357421875, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 27.855939149856567, "step": 768} +{"train_info/time_between_train_steps": 0.005597114562988281, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 27.805126905441284, "step": 769} +{"train_info/time_between_train_steps": 0.005776405334472656, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 27.947495222091675, "step": 770} +{"train_info/time_between_train_steps": 0.008142709732055664, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 27.78349733352661, "step": 771} +{"train_info/time_between_train_steps": 0.00575566291809082, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 27.814861536026, "step": 772} +{"train_info/time_between_train_steps": 0.006094932556152344, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 27.805216789245605, "step": 773} +{"train_info/time_between_train_steps": 0.005964756011962891, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 27.85020089149475, "step": 774} +{"train_info/time_between_train_steps": 0.00603485107421875, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 27.847450494766235, "step": 775} +{"train_info/time_between_train_steps": 0.006819009780883789, "step": 775} +{"train_info/time_between_train_steps": 13.919832229614258, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 27.805989265441895, "step": 776} +{"train_info/time_between_train_steps": 0.01030421257019043, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 27.95451831817627, "step": 777} +{"train_info/time_between_train_steps": 0.005746126174926758, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 27.798892736434937, "step": 778} +{"train_info/time_between_train_steps": 0.005934715270996094, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 27.927175045013428, "step": 779} +{"train_info/time_between_train_steps": 0.005692243576049805, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 27.77482771873474, "step": 780} +{"train_info/time_between_train_steps": 0.005900382995605469, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 27.787017107009888, "step": 781} +{"train_info/time_between_train_steps": 0.006899356842041016, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 27.76770830154419, "step": 782} +{"train_info/time_between_train_steps": 0.005573749542236328, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 27.854334354400635, "step": 783} +{"train_info/time_between_train_steps": 0.005635261535644531, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 27.798616647720337, "step": 784} +{"train_info/time_between_train_steps": 0.0055811405181884766, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 27.87811541557312, "step": 785} +{"train_info/time_between_train_steps": 0.0056345462799072266, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 27.872086763381958, "step": 786} +{"train_info/time_between_train_steps": 0.009838342666625977, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 27.829883813858032, "step": 787} +{"train_info/time_between_train_steps": 0.005644798278808594, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 27.795053720474243, "step": 788} +{"train_info/time_between_train_steps": 0.0057315826416015625, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 27.897669315338135, "step": 789} +{"train_info/time_between_train_steps": 0.005589962005615234, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 27.79181146621704, "step": 790} +{"train_info/time_between_train_steps": 0.00568842887878418, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 27.809251308441162, "step": 791} +{"train_info/time_between_train_steps": 0.0064983367919921875, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 27.868955850601196, "step": 792} +{"train_info/time_between_train_steps": 0.00562739372253418, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 27.76931858062744, "step": 793} +{"train_info/time_between_train_steps": 0.0058078765869140625, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 27.822606086730957, "step": 794} +{"train_info/time_between_train_steps": 0.005629777908325195, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 27.774896144866943, "step": 795} +{"train_info/time_between_train_steps": 0.005739688873291016, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 27.948761701583862, "step": 796} +{"train_info/time_between_train_steps": 0.005820751190185547, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 27.81742024421692, "step": 797} +{"train_info/time_between_train_steps": 0.006616353988647461, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 27.87259840965271, "step": 798} +{"train_info/time_between_train_steps": 0.005661725997924805, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 27.7761869430542, "step": 799} +{"train_info/time_between_train_steps": 0.006192445755004883, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 29.645362854003906, "step": 800} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736770628, "_runtime": 23788}, "step": 800} +{"logs": {"train/loss": 3.9603, "train/learning_rate": 0.00022222222222222218, "train/epoch": 31.02, "_timestamp": 1736770628, "_runtime": 23788}, "step": 800} +{"train_info/time_between_train_steps": 112.50460743904114, "step": 800} +{"train_info/time_between_train_steps": 126.19684195518494, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 27.885483026504517, "step": 801} +{"train_info/time_between_train_steps": 0.01058816909790039, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 28.028932094573975, "step": 802} +{"train_info/time_between_train_steps": 0.005914211273193359, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 27.837728261947632, "step": 803} +{"train_info/time_between_train_steps": 0.014566898345947266, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 27.932209968566895, "step": 804} +{"train_info/time_between_train_steps": 0.005892515182495117, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 27.8374764919281, "step": 805} +{"train_info/time_between_train_steps": 0.010966062545776367, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 27.849271059036255, "step": 806} +{"train_info/time_between_train_steps": 0.00567626953125, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 27.780447721481323, "step": 807} +{"train_info/time_between_train_steps": 0.010695695877075195, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 27.790610790252686, "step": 808} +{"train_info/time_between_train_steps": 0.005665779113769531, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 27.803869009017944, "step": 809} +{"train_info/time_between_train_steps": 0.005592823028564453, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 27.907532453536987, "step": 810} +{"train_info/time_between_train_steps": 0.005724430084228516, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 27.80135679244995, "step": 811} +{"train_info/time_between_train_steps": 0.010661840438842773, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 27.825995206832886, "step": 812} +{"train_info/time_between_train_steps": 0.005589485168457031, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 27.83267855644226, "step": 813} +{"train_info/time_between_train_steps": 0.005575895309448242, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 27.80614686012268, "step": 814} +{"train_info/time_between_train_steps": 0.005463123321533203, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 28.43012237548828, "step": 815} +{"train_info/time_between_train_steps": 0.0055353641510009766, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 27.770819902420044, "step": 816} +{"train_info/time_between_train_steps": 0.005574941635131836, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 27.883358478546143, "step": 817} +{"train_info/time_between_train_steps": 0.0054149627685546875, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 27.880375146865845, "step": 818} +{"train_info/time_between_train_steps": 0.010482311248779297, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 27.90444302558899, "step": 819} +{"train_info/time_between_train_steps": 0.0056378841400146484, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 27.76259469985962, "step": 820} +{"train_info/time_between_train_steps": 0.0055141448974609375, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 27.76681685447693, "step": 821} +{"train_info/time_between_train_steps": 0.005781650543212891, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 27.763794660568237, "step": 822} +{"train_info/time_between_train_steps": 0.005678653717041016, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 27.851807355880737, "step": 823} +{"train_info/time_between_train_steps": 0.0056874752044677734, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 27.781964778900146, "step": 824} +{"train_info/time_between_train_steps": 0.005898952484130859, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 27.864977836608887, "step": 825} +{"train_info/time_between_train_steps": 0.006608724594116211, "step": 825} +{"train_info/time_between_train_steps": 13.757605791091919, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 27.753596544265747, "step": 826} +{"train_info/time_between_train_steps": 0.005531787872314453, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 27.90178608894348, "step": 827} +{"train_info/time_between_train_steps": 0.005498647689819336, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 27.76353931427002, "step": 828} +{"train_info/time_between_train_steps": 0.005952358245849609, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 28.006605625152588, "step": 829} +{"train_info/time_between_train_steps": 0.00571131706237793, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 27.752915620803833, "step": 830} +{"train_info/time_between_train_steps": 0.005849361419677734, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 27.83815360069275, "step": 831} +{"train_info/time_between_train_steps": 0.0055942535400390625, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 27.885940551757812, "step": 832} +{"train_info/time_between_train_steps": 0.00549006462097168, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 27.748328685760498, "step": 833} +{"train_info/time_between_train_steps": 0.010629892349243164, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 27.74136781692505, "step": 834} +{"train_info/time_between_train_steps": 0.005579948425292969, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 27.749683141708374, "step": 835} +{"train_info/time_between_train_steps": 0.0056498050689697266, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 27.926404237747192, "step": 836} +{"train_info/time_between_train_steps": 0.010535955429077148, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 27.76457381248474, "step": 837} +{"train_info/time_between_train_steps": 0.005491971969604492, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 27.84949493408203, "step": 838} +{"train_info/time_between_train_steps": 0.010814666748046875, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 27.753231048583984, "step": 839} +{"train_info/time_between_train_steps": 0.005496501922607422, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 27.848390102386475, "step": 840} +{"train_info/time_between_train_steps": 0.005698442459106445, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 27.800089597702026, "step": 841} +{"train_info/time_between_train_steps": 0.009879350662231445, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 27.969332695007324, "step": 842} +{"train_info/time_between_train_steps": 0.005804538726806641, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 27.773050546646118, "step": 843} +{"train_info/time_between_train_steps": 0.005567073822021484, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 27.77501106262207, "step": 844} +{"train_info/time_between_train_steps": 0.005820035934448242, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 27.773304224014282, "step": 845} +{"train_info/time_between_train_steps": 0.0064239501953125, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 27.8367600440979, "step": 846} +{"train_info/time_between_train_steps": 0.00554656982421875, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 28.00558590888977, "step": 847} +{"train_info/time_between_train_steps": 0.007104396820068359, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 27.79370927810669, "step": 848} +{"train_info/time_between_train_steps": 0.005715608596801758, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 36.71973133087158, "step": 849} +{"train_info/time_between_train_steps": 0.005939006805419922, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 27.751780033111572, "step": 850} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736772171, "_runtime": 25331}, "step": 850} +{"logs": {"train/loss": 3.8758, "train/learning_rate": 0.00019444444444444443, "train/epoch": 33.02, "_timestamp": 1736772171, "_runtime": 25331}, "step": 850} +{"train_info/time_between_train_steps": 0.15668153762817383, "step": 850} +{"train_info/time_between_train_steps": 13.968432188034058, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 27.74456238746643, "step": 851} +{"train_info/time_between_train_steps": 0.005504131317138672, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 27.945371389389038, "step": 852} +{"train_info/time_between_train_steps": 0.005705118179321289, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 27.800166368484497, "step": 853} +{"train_info/time_between_train_steps": 0.00575709342956543, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 27.902467727661133, "step": 854} +{"train_info/time_between_train_steps": 0.0057239532470703125, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 27.801894664764404, "step": 855} +{"train_info/time_between_train_steps": 0.005731105804443359, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 27.822407007217407, "step": 856} +{"train_info/time_between_train_steps": 0.005387783050537109, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 27.77361488342285, "step": 857} +{"train_info/time_between_train_steps": 0.0054798126220703125, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 27.73770546913147, "step": 858} +{"train_info/time_between_train_steps": 0.005614280700683594, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 27.806421279907227, "step": 859} +{"train_info/time_between_train_steps": 0.00560307502746582, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 27.772645235061646, "step": 860} +{"train_info/time_between_train_steps": 0.010049819946289062, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 27.83652091026306, "step": 861} +{"train_info/time_between_train_steps": 0.005562305450439453, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 27.782289743423462, "step": 862} +{"train_info/time_between_train_steps": 0.00570225715637207, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 27.93501114845276, "step": 863} +{"train_info/time_between_train_steps": 0.005619049072265625, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 27.762295246124268, "step": 864} +{"train_info/time_between_train_steps": 0.005626678466796875, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 27.868817806243896, "step": 865} +{"train_info/time_between_train_steps": 0.0062770843505859375, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 27.780558586120605, "step": 866} +{"train_info/time_between_train_steps": 0.005575656890869141, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 27.970682382583618, "step": 867} +{"train_info/time_between_train_steps": 0.005654811859130859, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 27.816131353378296, "step": 868} +{"train_info/time_between_train_steps": 0.005535125732421875, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 27.811738967895508, "step": 869} +{"train_info/time_between_train_steps": 0.0055866241455078125, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 27.815229415893555, "step": 870} +{"train_info/time_between_train_steps": 0.0065860748291015625, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 27.767003774642944, "step": 871} +{"train_info/time_between_train_steps": 0.00551915168762207, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 27.83363103866577, "step": 872} +{"train_info/time_between_train_steps": 0.0059893131256103516, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 27.79260492324829, "step": 873} +{"train_info/time_between_train_steps": 0.011069059371948242, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 27.865904092788696, "step": 874} +{"train_info/time_between_train_steps": 0.00590062141418457, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 27.783316135406494, "step": 875} +{"train_info/time_between_train_steps": 0.012116432189941406, "step": 875} +{"train_info/time_between_train_steps": 13.575480222702026, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 27.826401233673096, "step": 876} +{"train_info/time_between_train_steps": 0.0055005550384521484, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 27.933605194091797, "step": 877} +{"train_info/time_between_train_steps": 0.006409168243408203, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 27.925312042236328, "step": 878} +{"train_info/time_between_train_steps": 0.005755424499511719, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 27.882396936416626, "step": 879} +{"train_info/time_between_train_steps": 0.005940914154052734, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 27.74695658683777, "step": 880} +{"train_info/time_between_train_steps": 0.005822658538818359, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 27.754720449447632, "step": 881} +{"train_info/time_between_train_steps": 0.00538945198059082, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 27.82474374771118, "step": 882} +{"train_info/time_between_train_steps": 0.005753755569458008, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 27.75214719772339, "step": 883} +{"train_info/time_between_train_steps": 0.005614519119262695, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 27.84853744506836, "step": 884} +{"train_info/time_between_train_steps": 0.005543947219848633, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 27.775789737701416, "step": 885} +{"train_info/time_between_train_steps": 0.005695819854736328, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 27.882265090942383, "step": 886} +{"train_info/time_between_train_steps": 0.005488157272338867, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 27.726102113723755, "step": 887} +{"train_info/time_between_train_steps": 0.0054929256439208984, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 27.84920644760132, "step": 888} +{"train_info/time_between_train_steps": 0.005610466003417969, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 27.793251037597656, "step": 889} +{"train_info/time_between_train_steps": 0.0055065155029296875, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 27.723915100097656, "step": 890} +{"train_info/time_between_train_steps": 0.005628347396850586, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 27.872400283813477, "step": 891} +{"train_info/time_between_train_steps": 0.005526304244995117, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 27.741447925567627, "step": 892} +{"train_info/time_between_train_steps": 0.010373353958129883, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 27.832624435424805, "step": 893} +{"train_info/time_between_train_steps": 0.00561976432800293, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 27.854756832122803, "step": 894} +{"train_info/time_between_train_steps": 0.005562543869018555, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 27.835635900497437, "step": 895} +{"train_info/time_between_train_steps": 0.010565996170043945, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 27.768699169158936, "step": 896} +{"train_info/time_between_train_steps": 0.005843639373779297, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 27.85326647758484, "step": 897} +{"train_info/time_between_train_steps": 0.011347293853759766, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 27.74851655960083, "step": 898} +{"train_info/time_between_train_steps": 0.00835108757019043, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 27.915247201919556, "step": 899} +{"train_info/time_between_train_steps": 0.011203765869140625, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 27.890201568603516, "step": 900} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736773592, "_runtime": 26752}, "step": 900} +{"logs": {"train/loss": 3.8026, "train/learning_rate": 0.00016666666666666666, "train/epoch": 35.02, "_timestamp": 1736773592, "_runtime": 26752}, "step": 900} +{"train_info/time_between_train_steps": 49.78202986717224, "step": 900} +{"train_info/time_between_train_steps": 63.45581531524658, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 27.795196294784546, "step": 901} +{"train_info/time_between_train_steps": 0.005838632583618164, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 27.93324589729309, "step": 902} +{"train_info/time_between_train_steps": 0.0056095123291015625, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 27.790510416030884, "step": 903} +{"train_info/time_between_train_steps": 0.0056574344635009766, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 28.14746904373169, "step": 904} +{"train_info/time_between_train_steps": 0.010781049728393555, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 27.746877908706665, "step": 905} +{"train_info/time_between_train_steps": 0.010921001434326172, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 27.770426988601685, "step": 906} +{"train_info/time_between_train_steps": 0.005372524261474609, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 27.757555961608887, "step": 907} +{"train_info/time_between_train_steps": 0.0064525604248046875, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 27.814995765686035, "step": 908} +{"train_info/time_between_train_steps": 0.00543975830078125, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 27.812819242477417, "step": 909} +{"train_info/time_between_train_steps": 0.005606412887573242, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 27.826868295669556, "step": 910} +{"train_info/time_between_train_steps": 0.005387544631958008, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 27.729374647140503, "step": 911} +{"train_info/time_between_train_steps": 0.00546717643737793, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 27.814750909805298, "step": 912} +{"train_info/time_between_train_steps": 0.005609035491943359, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 27.717275142669678, "step": 913} +{"train_info/time_between_train_steps": 0.0054132938385009766, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 27.747934341430664, "step": 914} +{"train_info/time_between_train_steps": 0.0054836273193359375, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 27.72804856300354, "step": 915} +{"train_info/time_between_train_steps": 0.00547480583190918, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 27.7190899848938, "step": 916} +{"train_info/time_between_train_steps": 0.0057790279388427734, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 27.758115768432617, "step": 917} +{"train_info/time_between_train_steps": 0.005463838577270508, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 27.852283239364624, "step": 918} +{"train_info/time_between_train_steps": 0.005346536636352539, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 27.862552404403687, "step": 919} +{"train_info/time_between_train_steps": 0.005478382110595703, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 27.727519273757935, "step": 920} +{"train_info/time_between_train_steps": 0.0054416656494140625, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 27.80972719192505, "step": 921} +{"train_info/time_between_train_steps": 0.005502939224243164, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 27.87611746788025, "step": 922} +{"train_info/time_between_train_steps": 0.010627508163452148, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 27.817258596420288, "step": 923} +{"train_info/time_between_train_steps": 0.00586700439453125, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 27.84935998916626, "step": 924} +{"train_info/time_between_train_steps": 0.006011962890625, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 27.832958221435547, "step": 925} +{"train_info/time_between_train_steps": 0.011481761932373047, "step": 925} +{"train_info/time_between_train_steps": 13.798529863357544, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 27.778882026672363, "step": 926} +{"train_info/time_between_train_steps": 0.00604248046875, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 28.029770374298096, "step": 927} +{"train_info/time_between_train_steps": 0.005614519119262695, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 27.7786602973938, "step": 928} +{"train_info/time_between_train_steps": 0.011189460754394531, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 27.92028021812439, "step": 929} +{"train_info/time_between_train_steps": 0.0057294368743896484, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 27.766005992889404, "step": 930} +{"train_info/time_between_train_steps": 0.005844593048095703, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 27.821321487426758, "step": 931} +{"train_info/time_between_train_steps": 0.005454063415527344, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 27.72860813140869, "step": 932} +{"train_info/time_between_train_steps": 0.005517721176147461, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 27.789564609527588, "step": 933} +{"train_info/time_between_train_steps": 0.005462169647216797, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 27.78322720527649, "step": 934} +{"train_info/time_between_train_steps": 0.005513191223144531, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 27.71812629699707, "step": 935} +{"train_info/time_between_train_steps": 0.0054819583892822266, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 27.736724376678467, "step": 936} +{"train_info/time_between_train_steps": 0.005514621734619141, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 27.736259698867798, "step": 937} +{"train_info/time_between_train_steps": 0.005515098571777344, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 27.887134552001953, "step": 938} +{"train_info/time_between_train_steps": 0.0054666996002197266, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 27.750272274017334, "step": 939} +{"train_info/time_between_train_steps": 0.005465984344482422, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 27.979854822158813, "step": 940} +{"train_info/time_between_train_steps": 0.005405902862548828, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 27.76312232017517, "step": 941} +{"train_info/time_between_train_steps": 0.005475044250488281, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 27.839470624923706, "step": 942} +{"train_info/time_between_train_steps": 0.005396366119384766, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 27.76960325241089, "step": 943} +{"train_info/time_between_train_steps": 0.0055370330810546875, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 27.86398482322693, "step": 944} +{"train_info/time_between_train_steps": 0.00553584098815918, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 27.765895128250122, "step": 945} +{"train_info/time_between_train_steps": 0.010947942733764648, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 27.841038465499878, "step": 946} +{"train_info/time_between_train_steps": 0.005686521530151367, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 27.83513379096985, "step": 947} +{"train_info/time_between_train_steps": 0.006712436676025391, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 27.86212682723999, "step": 948} +{"train_info/time_between_train_steps": 0.00750422477722168, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 27.753119707107544, "step": 949} +{"train_info/time_between_train_steps": 0.0056531429290771484, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 27.75039267539978, "step": 950} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736775062, "_runtime": 28222}, "step": 950} +{"logs": {"train/loss": 3.7385, "train/learning_rate": 0.0001388888888888889, "train/epoch": 37.02, "_timestamp": 1736775062, "_runtime": 28222}, "step": 950} +{"train_info/time_between_train_steps": 0.06273269653320312, "step": 950} +{"train_info/time_between_train_steps": 13.56045413017273, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 27.735020875930786, "step": 951} +{"train_info/time_between_train_steps": 0.005300998687744141, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 28.011929273605347, "step": 952} +{"train_info/time_between_train_steps": 0.0052947998046875, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 27.77720069885254, "step": 953} +{"train_info/time_between_train_steps": 0.005753040313720703, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 27.91980218887329, "step": 954} +{"train_info/time_between_train_steps": 0.005670309066772461, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 27.921959161758423, "step": 955} +{"train_info/time_between_train_steps": 0.0057485103607177734, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 27.777682542800903, "step": 956} +{"train_info/time_between_train_steps": 0.005518913269042969, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 27.777814626693726, "step": 957} +{"train_info/time_between_train_steps": 0.005376577377319336, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 27.750670433044434, "step": 958} +{"train_info/time_between_train_steps": 0.005622386932373047, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 27.85969090461731, "step": 959} +{"train_info/time_between_train_steps": 0.0055658817291259766, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 27.746658086776733, "step": 960} +{"train_info/time_between_train_steps": 0.005547046661376953, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 27.736730813980103, "step": 961} +{"train_info/time_between_train_steps": 0.005620479583740234, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 27.74106740951538, "step": 962} +{"train_info/time_between_train_steps": 0.005409717559814453, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 27.85073733329773, "step": 963} +{"train_info/time_between_train_steps": 0.010431766510009766, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 27.796507596969604, "step": 964} +{"train_info/time_between_train_steps": 0.005463123321533203, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 27.872962713241577, "step": 965} +{"train_info/time_between_train_steps": 0.005600452423095703, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 27.743285179138184, "step": 966} +{"train_info/time_between_train_steps": 0.005434989929199219, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 27.829609870910645, "step": 967} +{"train_info/time_between_train_steps": 0.005509138107299805, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 27.766668558120728, "step": 968} +{"train_info/time_between_train_steps": 0.010359525680541992, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 27.755210876464844, "step": 969} +{"train_info/time_between_train_steps": 0.005482673645019531, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 27.777215719223022, "step": 970} +{"train_info/time_between_train_steps": 0.005734443664550781, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 27.890770435333252, "step": 971} +{"train_info/time_between_train_steps": 0.005698680877685547, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 27.840805768966675, "step": 972} +{"train_info/time_between_train_steps": 0.005999326705932617, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 27.789681434631348, "step": 973} +{"train_info/time_between_train_steps": 0.010700464248657227, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 27.878349781036377, "step": 974} +{"train_info/time_between_train_steps": 0.005894899368286133, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 27.81978416442871, "step": 975} +{"train_info/time_between_train_steps": 0.006127595901489258, "step": 975} +{"train_info/time_between_train_steps": 13.518863677978516, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 27.787122011184692, "step": 976} +{"train_info/time_between_train_steps": 0.0057582855224609375, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 27.89547061920166, "step": 977} +{"train_info/time_between_train_steps": 0.005686044692993164, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 27.841861724853516, "step": 978} +{"train_info/time_between_train_steps": 0.005653858184814453, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 27.938862800598145, "step": 979} +{"train_info/time_between_train_steps": 0.005728960037231445, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 27.88083553314209, "step": 980} +{"train_info/time_between_train_steps": 0.006856441497802734, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 27.752991437911987, "step": 981} +{"train_info/time_between_train_steps": 0.008362054824829102, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 27.777425289154053, "step": 982} +{"train_info/time_between_train_steps": 0.005982398986816406, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 27.80885648727417, "step": 983} +{"train_info/time_between_train_steps": 0.005407571792602539, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 27.831488370895386, "step": 984} +{"train_info/time_between_train_steps": 0.005551815032958984, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 27.723004817962646, "step": 985} +{"train_info/time_between_train_steps": 0.0053861141204833984, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 27.866868019104004, "step": 986} +{"train_info/time_between_train_steps": 0.006902933120727539, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 27.767178773880005, "step": 987} +{"train_info/time_between_train_steps": 0.005437612533569336, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 27.790215015411377, "step": 988} +{"train_info/time_between_train_steps": 0.010234355926513672, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 27.885985136032104, "step": 989} +{"train_info/time_between_train_steps": 0.005417585372924805, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 27.71319270133972, "step": 990} +{"train_info/time_between_train_steps": 0.005372762680053711, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 27.7821786403656, "step": 991} +{"train_info/time_between_train_steps": 0.00556492805480957, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 27.71256399154663, "step": 992} +{"train_info/time_between_train_steps": 0.01068568229675293, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 27.874019145965576, "step": 993} +{"train_info/time_between_train_steps": 0.005522489547729492, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 27.75732183456421, "step": 994} +{"train_info/time_between_train_steps": 0.006323814392089844, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 27.901568174362183, "step": 995} +{"train_info/time_between_train_steps": 0.005414724349975586, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 27.748281002044678, "step": 996} +{"train_info/time_between_train_steps": 0.005735158920288086, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 27.85825800895691, "step": 997} +{"train_info/time_between_train_steps": 0.005818843841552734, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 27.841057062149048, "step": 998} +{"train_info/time_between_train_steps": 0.011514425277709961, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 27.831197500228882, "step": 999} +{"train_info/time_between_train_steps": 0.011204004287719727, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 28.075220823287964, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736776482, "_runtime": 29642}, "step": 1000} +{"logs": {"train/loss": 3.6821, "train/learning_rate": 0.00011111111111111109, "train/epoch": 39.02, "_timestamp": 1736776482, "_runtime": 29642}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736776488, "_runtime": 29648}, "step": 1000} +{"logs": {"eval/loss": 4.709035396575928, "eval/runtime": 5.2665, "eval/samples_per_second": 18.038, "eval/steps_per_second": 1.139, "train/epoch": 39.02, "_timestamp": 1736776488, "_runtime": 29648}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736776489, "_runtime": 29649}, "step": 1000} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.709035396575928, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 110.94509026017886, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 5.2665, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 18.038, "train/epoch": 39.02, "_timestamp": 1736776489, "_runtime": 29649}, "step": 1000} +{"train_info/time_between_train_steps": 79.29386258125305, "step": 1000} +{"train_info/time_between_train_steps": 92.99435663223267, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 27.88115644454956, "step": 1001} +{"train_info/time_between_train_steps": 0.0059320926666259766, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 27.927451372146606, "step": 1002} +{"train_info/time_between_train_steps": 0.011047601699829102, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 27.756062984466553, "step": 1003} +{"train_info/time_between_train_steps": 0.006631135940551758, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 27.916649103164673, "step": 1004} +{"train_info/time_between_train_steps": 0.007570028305053711, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 27.77912998199463, "step": 1005} +{"train_info/time_between_train_steps": 0.006429910659790039, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 27.801849842071533, "step": 1006} +{"train_info/time_between_train_steps": 0.005457639694213867, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 27.838053464889526, "step": 1007} +{"train_info/time_between_train_steps": 0.005471944808959961, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 27.757721662521362, "step": 1008} +{"train_info/time_between_train_steps": 0.005513191223144531, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 27.81130838394165, "step": 1009} +{"train_info/time_between_train_steps": 0.005524635314941406, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 27.748375415802002, "step": 1010} +{"train_info/time_between_train_steps": 0.005368232727050781, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 27.861077547073364, "step": 1011} +{"train_info/time_between_train_steps": 0.005580425262451172, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 27.741008520126343, "step": 1012} +{"train_info/time_between_train_steps": 0.005513429641723633, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 27.762653827667236, "step": 1013} +{"train_info/time_between_train_steps": 0.00553131103515625, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 27.875653982162476, "step": 1014} +{"train_info/time_between_train_steps": 0.005478382110595703, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 27.729121446609497, "step": 1015} +{"train_info/time_between_train_steps": 0.0053942203521728516, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 27.788841724395752, "step": 1016} +{"train_info/time_between_train_steps": 0.005611896514892578, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 27.866578340530396, "step": 1017} +{"train_info/time_between_train_steps": 0.005399465560913086, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 27.887500524520874, "step": 1018} +{"train_info/time_between_train_steps": 0.0055201053619384766, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 27.775537252426147, "step": 1019} +{"train_info/time_between_train_steps": 0.005656719207763672, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 27.867093324661255, "step": 1020} +{"train_info/time_between_train_steps": 0.005665779113769531, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 27.778886079788208, "step": 1021} +{"train_info/time_between_train_steps": 0.010128259658813477, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 27.842901706695557, "step": 1022} +{"train_info/time_between_train_steps": 0.0057373046875, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 27.79090118408203, "step": 1023} +{"train_info/time_between_train_steps": 0.006609201431274414, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 27.83827042579651, "step": 1024} +{"train_info/time_between_train_steps": 0.011402368545532227, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 27.76199245452881, "step": 1025} +{"train_info/time_between_train_steps": 0.011415481567382812, "step": 1025} +{"train_info/time_between_train_steps": 13.473066806793213, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 27.980201721191406, "step": 1026} +{"train_info/time_between_train_steps": 0.005617618560791016, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 27.927303791046143, "step": 1027} +{"train_info/time_between_train_steps": 0.00557255744934082, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 27.824453830718994, "step": 1028} +{"train_info/time_between_train_steps": 0.005778074264526367, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 27.902002573013306, "step": 1029} +{"train_info/time_between_train_steps": 0.0056879520416259766, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 27.79703664779663, "step": 1030} +{"train_info/time_between_train_steps": 0.005833148956298828, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 27.83334732055664, "step": 1031} +{"train_info/time_between_train_steps": 0.0055103302001953125, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 27.889672994613647, "step": 1032} +{"train_info/time_between_train_steps": 0.005402565002441406, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 27.763370275497437, "step": 1033} +{"train_info/time_between_train_steps": 0.005517721176147461, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 27.749327421188354, "step": 1034} +{"train_info/time_between_train_steps": 0.010672807693481445, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 28.105374336242676, "step": 1035} +{"train_info/time_between_train_steps": 0.007647037506103516, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 27.779030799865723, "step": 1036} +{"train_info/time_between_train_steps": 0.01027679443359375, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 27.872706651687622, "step": 1037} +{"train_info/time_between_train_steps": 0.005431652069091797, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 27.770676851272583, "step": 1038} +{"train_info/time_between_train_steps": 0.005513191223144531, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 27.918116569519043, "step": 1039} +{"train_info/time_between_train_steps": 0.005440473556518555, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 27.775076627731323, "step": 1040} +{"train_info/time_between_train_steps": 0.008563756942749023, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 27.875652551651, "step": 1041} +{"train_info/time_between_train_steps": 0.005473136901855469, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 27.76671814918518, "step": 1042} +{"train_info/time_between_train_steps": 0.005452394485473633, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 27.923564434051514, "step": 1043} +{"train_info/time_between_train_steps": 0.00556182861328125, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 27.769164323806763, "step": 1044} +{"train_info/time_between_train_steps": 0.008773565292358398, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 27.898738384246826, "step": 1045} +{"train_info/time_between_train_steps": 0.005610227584838867, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 27.78192400932312, "step": 1046} +{"train_info/time_between_train_steps": 0.005548238754272461, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 27.79535222053528, "step": 1047} +{"train_info/time_between_train_steps": 0.0057373046875, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 28.003480672836304, "step": 1048} +{"train_info/time_between_train_steps": 0.005906343460083008, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 27.790306329727173, "step": 1049} +{"train_info/time_between_train_steps": 0.00571131706237793, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 27.93295907974243, "step": 1050} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736777982, "_runtime": 31142}, "step": 1050} +{"logs": {"train/loss": 3.6317, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 41.02, "_timestamp": 1736777982, "_runtime": 31142}, "step": 1050} +{"train_info/time_between_train_steps": 0.06904292106628418, "step": 1050} +{"train_info/time_between_train_steps": 13.498935222625732, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 27.916285514831543, "step": 1051} +{"train_info/time_between_train_steps": 0.005598545074462891, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 27.926565647125244, "step": 1052} +{"train_info/time_between_train_steps": 0.0055849552154541016, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 27.789872646331787, "step": 1053} +{"train_info/time_between_train_steps": 0.005699634552001953, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 27.888331413269043, "step": 1054} +{"train_info/time_between_train_steps": 0.005700826644897461, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 27.80591917037964, "step": 1055} +{"train_info/time_between_train_steps": 0.005786895751953125, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 27.86439347267151, "step": 1056} +{"train_info/time_between_train_steps": 0.007930517196655273, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 27.781832218170166, "step": 1057} +{"train_info/time_between_train_steps": 0.005613088607788086, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 27.774760484695435, "step": 1058} +{"train_info/time_between_train_steps": 0.01022648811340332, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 27.766380786895752, "step": 1059} +{"train_info/time_between_train_steps": 0.008392572402954102, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 27.762675285339355, "step": 1060} +{"train_info/time_between_train_steps": 0.005589962005615234, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 27.743847131729126, "step": 1061} +{"train_info/time_between_train_steps": 0.009017705917358398, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 27.864057064056396, "step": 1062} +{"train_info/time_between_train_steps": 0.010518550872802734, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 27.85637402534485, "step": 1063} +{"train_info/time_between_train_steps": 0.0055692195892333984, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 27.798851490020752, "step": 1064} +{"train_info/time_between_train_steps": 0.00556182861328125, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 27.7778480052948, "step": 1065} +{"train_info/time_between_train_steps": 0.005735874176025391, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 27.787466287612915, "step": 1066} +{"train_info/time_between_train_steps": 0.010568380355834961, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 27.79061770439148, "step": 1067} +{"train_info/time_between_train_steps": 0.00574183464050293, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 27.793501138687134, "step": 1068} +{"train_info/time_between_train_steps": 0.005633831024169922, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 27.897372722625732, "step": 1069} +{"train_info/time_between_train_steps": 0.005918979644775391, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 27.780814170837402, "step": 1070} +{"train_info/time_between_train_steps": 0.005742073059082031, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 27.920581817626953, "step": 1071} +{"train_info/time_between_train_steps": 0.00573420524597168, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 27.793231964111328, "step": 1072} +{"train_info/time_between_train_steps": 0.00607609748840332, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 27.858748197555542, "step": 1073} +{"train_info/time_between_train_steps": 0.0057506561279296875, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 27.799169063568115, "step": 1074} +{"train_info/time_between_train_steps": 0.006172895431518555, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 27.917004585266113, "step": 1075} +{"train_info/time_between_train_steps": 0.006356477737426758, "step": 1075} +{"train_info/time_between_train_steps": 13.776527643203735, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 27.78588581085205, "step": 1076} +{"train_info/time_between_train_steps": 0.005937099456787109, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 28.06867790222168, "step": 1077} +{"train_info/time_between_train_steps": 0.005988597869873047, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 27.93189311027527, "step": 1078} +{"train_info/time_between_train_steps": 0.0055124759674072266, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 27.948622703552246, "step": 1079} +{"train_info/time_between_train_steps": 0.005827665328979492, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 27.849504709243774, "step": 1080} +{"train_info/time_between_train_steps": 0.005678653717041016, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 27.963651418685913, "step": 1081} +{"train_info/time_between_train_steps": 0.0054552555084228516, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 27.752122402191162, "step": 1082} +{"train_info/time_between_train_steps": 0.005613565444946289, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 27.88603973388672, "step": 1083} +{"train_info/time_between_train_steps": 0.005365610122680664, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 27.788530588150024, "step": 1084} +{"train_info/time_between_train_steps": 0.005800008773803711, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 27.9082088470459, "step": 1085} +{"train_info/time_between_train_steps": 0.005387067794799805, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 27.781545877456665, "step": 1086} +{"train_info/time_between_train_steps": 0.010468244552612305, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 27.79625368118286, "step": 1087} +{"train_info/time_between_train_steps": 0.005556344985961914, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 27.78951859474182, "step": 1088} +{"train_info/time_between_train_steps": 0.0063173770904541016, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 27.738264083862305, "step": 1089} +{"train_info/time_between_train_steps": 0.005849599838256836, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 27.86385726928711, "step": 1090} +{"train_info/time_between_train_steps": 0.005472898483276367, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 27.749938011169434, "step": 1091} +{"train_info/time_between_train_steps": 0.005823373794555664, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 27.838953495025635, "step": 1092} +{"train_info/time_between_train_steps": 0.005341529846191406, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 27.735507249832153, "step": 1093} +{"train_info/time_between_train_steps": 0.010230302810668945, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 27.97118616104126, "step": 1094} +{"train_info/time_between_train_steps": 0.005629062652587891, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 27.765188932418823, "step": 1095} +{"train_info/time_between_train_steps": 0.00818777084350586, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 27.871927499771118, "step": 1096} +{"train_info/time_between_train_steps": 0.0058820247650146484, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 27.774186611175537, "step": 1097} +{"train_info/time_between_train_steps": 0.005779266357421875, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 27.943560123443604, "step": 1098} +{"train_info/time_between_train_steps": 0.00597691535949707, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 27.7624089717865, "step": 1099} +{"train_info/time_between_train_steps": 0.0063397884368896484, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 32.306228160858154, "step": 1100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736779410, "_runtime": 32570}, "step": 1100} +{"logs": {"train/loss": 3.5903, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 43.02, "_timestamp": 1736779410, "_runtime": 32570}, "step": 1100} +{"train_info/time_between_train_steps": 146.55639123916626, "step": 1100} +{"train_info/time_between_train_steps": 160.3728907108307, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 27.778016805648804, "step": 1101} +{"train_info/time_between_train_steps": 0.005912303924560547, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 27.96742081642151, "step": 1102} +{"train_info/time_between_train_steps": 0.010625839233398438, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 27.829864263534546, "step": 1103} +{"train_info/time_between_train_steps": 0.0069658756256103516, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 28.044952630996704, "step": 1104} +{"train_info/time_between_train_steps": 0.005888700485229492, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 27.823915719985962, "step": 1105} +{"train_info/time_between_train_steps": 0.0058214664459228516, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 27.84587597846985, "step": 1106} +{"train_info/time_between_train_steps": 0.0056056976318359375, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 27.82313585281372, "step": 1107} +{"train_info/time_between_train_steps": 0.007210493087768555, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 27.89193844795227, "step": 1108} +{"train_info/time_between_train_steps": 0.005644559860229492, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 27.878269910812378, "step": 1109} +{"train_info/time_between_train_steps": 0.005488872528076172, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 27.78833532333374, "step": 1110} +{"train_info/time_between_train_steps": 0.005593776702880859, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 27.923163652420044, "step": 1111} +{"train_info/time_between_train_steps": 0.0057413578033447266, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 27.786980152130127, "step": 1112} +{"train_info/time_between_train_steps": 0.005831003189086914, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 27.92686176300049, "step": 1113} +{"train_info/time_between_train_steps": 0.008581399917602539, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 27.814500331878662, "step": 1114} +{"train_info/time_between_train_steps": 0.007870674133300781, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 27.94247078895569, "step": 1115} +{"train_info/time_between_train_steps": 0.005577564239501953, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 27.813540935516357, "step": 1116} +{"train_info/time_between_train_steps": 0.00551295280456543, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 27.9060537815094, "step": 1117} +{"train_info/time_between_train_steps": 0.005593299865722656, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 27.818179845809937, "step": 1118} +{"train_info/time_between_train_steps": 0.005536317825317383, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 27.93628978729248, "step": 1119} +{"train_info/time_between_train_steps": 0.005585908889770508, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 27.836981773376465, "step": 1120} +{"train_info/time_between_train_steps": 0.005736589431762695, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 27.880160093307495, "step": 1121} +{"train_info/time_between_train_steps": 0.0055196285247802734, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 27.81790852546692, "step": 1122} +{"train_info/time_between_train_steps": 0.00584864616394043, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 27.849015712738037, "step": 1123} +{"train_info/time_between_train_steps": 0.005699634552001953, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 27.900106191635132, "step": 1124} +{"train_info/time_between_train_steps": 0.005903959274291992, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 27.934708833694458, "step": 1125} +{"train_info/time_between_train_steps": 0.011320114135742188, "step": 1125} +{"train_info/time_between_train_steps": 13.560773134231567, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 27.833829641342163, "step": 1126} +{"train_info/time_between_train_steps": 0.005989551544189453, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 27.962826013565063, "step": 1127} +{"train_info/time_between_train_steps": 0.005765438079833984, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 27.841735363006592, "step": 1128} +{"train_info/time_between_train_steps": 0.005662679672241211, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 27.965617418289185, "step": 1129} +{"train_info/time_between_train_steps": 0.010175943374633789, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 27.85622000694275, "step": 1130} +{"train_info/time_between_train_steps": 0.005673646926879883, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 27.83150053024292, "step": 1131} +{"train_info/time_between_train_steps": 0.010671377182006836, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 27.938379526138306, "step": 1132} +{"train_info/time_between_train_steps": 0.005553483963012695, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 27.85143208503723, "step": 1133} +{"train_info/time_between_train_steps": 0.015225887298583984, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 27.877789735794067, "step": 1134} +{"train_info/time_between_train_steps": 0.007933855056762695, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 27.78832721710205, "step": 1135} +{"train_info/time_between_train_steps": 0.005583763122558594, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 27.738161325454712, "step": 1136} +{"train_info/time_between_train_steps": 0.010756492614746094, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 27.786237716674805, "step": 1137} +{"train_info/time_between_train_steps": 0.007282733917236328, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 27.95731830596924, "step": 1138} +{"train_info/time_between_train_steps": 0.008994102478027344, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 27.762458086013794, "step": 1139} +{"train_info/time_between_train_steps": 0.009851694107055664, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 27.98816728591919, "step": 1140} +{"train_info/time_between_train_steps": 0.0054836273193359375, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 27.749155044555664, "step": 1141} +{"train_info/time_between_train_steps": 0.00559544563293457, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 27.856427907943726, "step": 1142} +{"train_info/time_between_train_steps": 0.0055844783782958984, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 27.812544345855713, "step": 1143} +{"train_info/time_between_train_steps": 0.005524396896362305, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 27.817763328552246, "step": 1144} +{"train_info/time_between_train_steps": 0.009152412414550781, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 27.826622009277344, "step": 1145} +{"train_info/time_between_train_steps": 0.005655765533447266, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 27.745569705963135, "step": 1146} +{"train_info/time_between_train_steps": 0.005624055862426758, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 27.822904348373413, "step": 1147} +{"train_info/time_between_train_steps": 0.005801200866699219, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 27.825608491897583, "step": 1148} +{"train_info/time_between_train_steps": 0.005883216857910156, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 27.869949102401733, "step": 1149} +{"train_info/time_between_train_steps": 0.005989789962768555, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 27.850524187088013, "step": 1150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736780979, "_runtime": 34139}, "step": 1150} +{"logs": {"train/loss": 3.5567, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 45.02, "_timestamp": 1736780979, "_runtime": 34139}, "step": 1150} +{"train_info/time_between_train_steps": 0.03417611122131348, "step": 1150} +{"train_info/time_between_train_steps": 13.816463947296143, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 27.889103174209595, "step": 1151} +{"train_info/time_between_train_steps": 0.0054264068603515625, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 27.9248468875885, "step": 1152} +{"train_info/time_between_train_steps": 0.005366086959838867, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 27.84695863723755, "step": 1153} +{"train_info/time_between_train_steps": 0.0059511661529541016, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 27.94793725013733, "step": 1154} +{"train_info/time_between_train_steps": 0.0056591033935546875, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 27.949291706085205, "step": 1155} +{"train_info/time_between_train_steps": 0.010520219802856445, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 27.925952196121216, "step": 1156} +{"train_info/time_between_train_steps": 0.00899648666381836, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 27.781275033950806, "step": 1157} +{"train_info/time_between_train_steps": 0.010600805282592773, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 27.80236530303955, "step": 1158} +{"train_info/time_between_train_steps": 0.005707740783691406, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 27.813570737838745, "step": 1159} +{"train_info/time_between_train_steps": 0.005612850189208984, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 27.801313400268555, "step": 1160} +{"train_info/time_between_train_steps": 0.005519866943359375, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 27.830857276916504, "step": 1161} +{"train_info/time_between_train_steps": 0.0055866241455078125, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 27.770379781723022, "step": 1162} +{"train_info/time_between_train_steps": 0.008804082870483398, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 27.892955541610718, "step": 1163} +{"train_info/time_between_train_steps": 0.010837793350219727, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 27.88989567756653, "step": 1164} +{"train_info/time_between_train_steps": 0.00549006462097168, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 27.78991198539734, "step": 1165} +{"train_info/time_between_train_steps": 0.005645036697387695, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 27.877788305282593, "step": 1166} +{"train_info/time_between_train_steps": 0.005600452423095703, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 27.840307474136353, "step": 1167} +{"train_info/time_between_train_steps": 0.0056993961334228516, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 27.7795352935791, "step": 1168} +{"train_info/time_between_train_steps": 0.008302450180053711, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 27.766916036605835, "step": 1169} +{"train_info/time_between_train_steps": 0.009710073471069336, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 27.84773349761963, "step": 1170} +{"train_info/time_between_train_steps": 0.010645151138305664, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 27.86344575881958, "step": 1171} +{"train_info/time_between_train_steps": 0.0056400299072265625, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 27.80300545692444, "step": 1172} +{"train_info/time_between_train_steps": 0.005815982818603516, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 27.827930688858032, "step": 1173} +{"train_info/time_between_train_steps": 0.00913548469543457, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 27.80354332923889, "step": 1174} +{"train_info/time_between_train_steps": 0.011055707931518555, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 27.866885662078857, "step": 1175} +{"train_info/time_between_train_steps": 0.01179361343383789, "step": 1175} +{"train_info/time_between_train_steps": 13.926899433135986, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 27.86172103881836, "step": 1176} +{"train_info/time_between_train_steps": 0.005332469940185547, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 27.946669816970825, "step": 1177} +{"train_info/time_between_train_steps": 0.005641460418701172, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 27.79171848297119, "step": 1178} +{"train_info/time_between_train_steps": 0.005915164947509766, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 27.961012601852417, "step": 1179} +{"train_info/time_between_train_steps": 0.0056171417236328125, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 27.77955389022827, "step": 1180} +{"train_info/time_between_train_steps": 0.007494926452636719, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 27.872594594955444, "step": 1181} +{"train_info/time_between_train_steps": 0.010823726654052734, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 27.79656219482422, "step": 1182} +{"train_info/time_between_train_steps": 0.005383968353271484, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 27.848153114318848, "step": 1183} +{"train_info/time_between_train_steps": 0.005588531494140625, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 27.827144145965576, "step": 1184} +{"train_info/time_between_train_steps": 0.00543665885925293, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 27.84482455253601, "step": 1185} +{"train_info/time_between_train_steps": 0.005452871322631836, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 27.946913957595825, "step": 1186} +{"train_info/time_between_train_steps": 0.005432844161987305, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 27.776020050048828, "step": 1187} +{"train_info/time_between_train_steps": 0.005477428436279297, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 27.818338632583618, "step": 1188} +{"train_info/time_between_train_steps": 0.005493640899658203, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 27.85594153404236, "step": 1189} +{"train_info/time_between_train_steps": 0.010572195053100586, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 27.867671012878418, "step": 1190} +{"train_info/time_between_train_steps": 0.0057375431060791016, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 27.891180276870728, "step": 1191} +{"train_info/time_between_train_steps": 0.0054225921630859375, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 27.765401601791382, "step": 1192} +{"train_info/time_between_train_steps": 0.007839441299438477, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 27.881076335906982, "step": 1193} +{"train_info/time_between_train_steps": 0.005518198013305664, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 27.81657075881958, "step": 1194} +{"train_info/time_between_train_steps": 0.005501270294189453, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 27.864324808120728, "step": 1195} +{"train_info/time_between_train_steps": 0.005598306655883789, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 27.83083748817444, "step": 1196} +{"train_info/time_between_train_steps": 0.01072549819946289, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 27.824391841888428, "step": 1197} +{"train_info/time_between_train_steps": 0.011285066604614258, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 27.900330781936646, "step": 1198} +{"train_info/time_between_train_steps": 0.01090693473815918, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 27.78675127029419, "step": 1199} +{"train_info/time_between_train_steps": 0.0059740543365478516, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 29.987390279769897, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736782406, "_runtime": 35566}, "step": 1200} +{"logs": {"train/loss": 3.533, "train/learning_rate": 0.0, "train/epoch": 47.02, "_timestamp": 1736782406, "_runtime": 35566}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736782473, "_runtime": 35633}, "step": 1200} +{"logs": {"train/train_runtime": 35634.3589, "train/train_samples_per_second": 17.242, "train/train_steps_per_second": 0.034, "train/total_flos": 3.2574232461312e+17, "train/train_loss": 4.762492023309072, "train/epoch": 47.02, "_timestamp": 1736782473, "_runtime": 35633}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736782494, "_runtime": 35654}, "step": 1200} +{"logs": {"eval/loss": 4.733560085296631, "eval/runtime": 1.9697, "eval/samples_per_second": 48.231, "eval/steps_per_second": 3.046, "train/epoch": 47.02, "_timestamp": 1736782494, "_runtime": 35654}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736782494, "_runtime": 35654}, "step": 1200} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.733560085296631, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 113.69962303283889, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 1.9697, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 48.231, "train/epoch": 47.02, "_timestamp": 1736782494, "_runtime": 35654}, "step": 1200} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..868001a08109980517cb632374c68f5e59faeab9 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8b835e191ab1ccba6197d07ba28e4b9bc4661f01c6d6948073e0f35780f63e +size 510396521 diff --git a/shuffle_control_pl_PL_randinit_seed53.log b/shuffle_control_pl_PL_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..7dac276bf62858760c318a0b1c4029573c9b18dd --- /dev/null +++ b/shuffle_control_pl_PL_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 01/13 [06:38:59] - mistral - INFO :: Starting Run: shuffle_control_pl_PL_randinit_seed53... +|=>> 01/13 [06:38:59] - mistral - INFO :: Setting Random Seed to 53! +|=>> 01/13 [06:38:59] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 01/13 [06:38:59] - mistral - INFO :: Using Configs For Model From: /scratch/xiulyang/multilingual-LM/mistral/conf/models/gpt2-small-PL.json ... +|=>> 01/13 [06:38:59] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'bos_token_id': 50256, 'embd_pdrop': 0.1, 'eos_token_id': 50256, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 50257} ... +|=>> 01/13 [06:38:59] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 01/13 [06:38:59] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 01/13 [06:38:59] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 01/13 [06:39:04] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 01/13 [06:39:04] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 01/13 [06:39:04] - mistral - INFO :: Downloading and Preprocessing Dataset `/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py`... +|=>> 01/13 [06:39:04] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/shuffle_control_pl/train +|=>> 01/13 [06:39:06] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Total sentences: 1068956 +|=>> 01/13 [06:39:06] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 01/13 [06:39:11] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 01/13 [06:39:12] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 01/13 [06:39:12] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 01/13 [06:39:18] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/shuffle_control_pl/dev +|=>> 01/13 [06:39:18] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Total sentences: 5766 +|=>> 01/13 [06:39:18] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 01/13 [06:39:18] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 01/13 [06:39:18] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 01/13 [06:39:18] - datasets_modules.datasets.multilingual_dataset.248f0017d070309f41870705126964d3791eaa3b316ed4c2bd9c05e827869435.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 01/13 [06:39:18] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 01/13 [06:39:18] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 01/13 [06:39:49] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 01/13 [06:39:50] - mistral - INFO :: Initializing Model Trainer... +|=>> 01/13 [06:39:50] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//scratch/xiulyang/multilingual_models/shuffle_control_pl_PL_randinit/babylm_shuffle_control_pl_PL_randinit_seed53/runs/shuffle_control_pl_PL_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=shuffle_control_pl_PL_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 01/13 [06:39:53] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 01/13 [06:40:39] - mistral - INFO :: Training... +|=>> 01/13 [06:40:39] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 01/13 [16:34:52] - mistral - INFO :: ...and that's all folks! +|=>> 01/13 [16:34:52] - mistral - INFO :: Running final evaluation... diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..cd0f8832699caf9cedfc54e6ee63cf2d7599438b --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cc1b4962335ce6a2833d97d3275a2adb3ffdc23fb0d423b7d2cbc9c573922 +size 3183