diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..061288474e4dc90b01d25ca0d912dbf2bc2cbece --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4cb7ddeb977797468ae0b2e8a977bb66b33b53cf9ce0a85051cfaa03f3f32eb +size 420912233 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..c002a7f42b21a6309ad330b5d211678bd2c63fbd --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e0bff3da591a37483fbdb2c6720640af7f0ea4c999c910dba2eb453a70a204 +size 816635249 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..cad63ca16f77e990409048f3f401180ce20bdbb0 --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d3c905b92cc53437524205131a67a775f18714e2013b47cf9d71af80f762b28 +size 420912233 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..6ac854f8fcc13f31e3b42ae02baf1c19bbbeda60 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:207530ef64ef63afe414764b2b31c49b25058123a2a475653cc795510e30185b +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..11a3fcb1faec8d7fc4cf32241d77c30d6cfb8758 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0233333333333334, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7044770480128e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..6b4bff659a47fd5d8825635f4db28d6c928d6834 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cfa38bb1ba6e891e3bc944e9b8e7ee876b6c234b9b49d764ed8e600c7c55c74 +size 816635441 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..9c368be79876fd432342c71d64b407382eebbcfb --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c98c76b89281c7e76d491f5a38dc852e84312c1c49ca1ceb74e29312692197a4 +size 420912233 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..cc3f5ce473f559753013b753eef431b8bbd0b7d5 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc08780ff5de424785b55796236eefb06ce7099ddacd7d500c5f1576fab3027 +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..2ff16b8eaeb9e1360d10ace4a06dd6c74225ebc5 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 27.023333333333333, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9498, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8436, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.625, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.5012, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3399, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2438, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1962, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0903, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0601, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9726, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.9202, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.9065, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8338, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7941, + "step": 900 + }, + { + "epoch": 26.01, + "learning_rate": 0.0001388888888888889, + "loss": 2.7915, + "step": 950 + }, + { + "epoch": 27.02, + "learning_rate": 0.00011111111111111109, + "loss": 2.7268, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_loss": 3.4587721824645996, + "eval_runtime": 2.4277, + "eval_samples_per_second": 54.372, + "eval_steps_per_second": 3.707, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4587721824645996, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 31.777935045784314, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4277, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 54.372, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.71457337212928e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..408f56d7bc5551762add0e96fe4eb0f06ae27d3d --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3052769795b4e67ca9a03ef223df4ce71777479a3489e2b1e4f85d887d302b4 +size 816635441 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..134f2555e0a3bc38e909be2e3fc77a8998ea13c8 --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e88e8dcddb6f8dfc4a1c7386c56528608c898c77d1d3872a7b9f82e19bcdb40c +size 420912233 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..ef9ab68173032d385a85d5bd70f572ffe40d14b9 --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57263e67bf3cdd95a83cf82ef463fe03ed0872e6225b179f010d87e94d17b551 +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..e96c02554f819687a26a0296cf22be973e7b8352 --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 30.016666666666666, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9498, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8436, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.625, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.5012, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3399, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2438, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1962, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0903, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0601, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9726, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.9202, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.9065, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8338, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7941, + "step": 900 + }, + { + "epoch": 26.01, + "learning_rate": 0.0001388888888888889, + "loss": 2.7915, + "step": 950 + }, + { + "epoch": 27.02, + "learning_rate": 0.00011111111111111109, + "loss": 2.7268, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_loss": 3.4587721824645996, + "eval_runtime": 2.4277, + "eval_samples_per_second": 54.372, + "eval_steps_per_second": 3.707, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4587721824645996, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 31.777935045784314, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4277, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 54.372, + "step": 1000 + }, + { + "epoch": 29.0, + "learning_rate": 8.333333333333333e-05, + "loss": 2.7262, + "step": 1050 + }, + { + "epoch": 30.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 2.6701, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.9864634089472e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..883d17b81c8e1e34439cb5c409aa42c4b84eb1ee --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dfa23e00d412f804a994d257ce794babc8f50934013b77c620bbb117253873f +size 816635441 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..1fdfdbf52036d73260e04570717c1a6a23982713 --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4d6589696541b6a5d42066e70e1f0fd48588b2aa262cd7b5b9f593990c31c7 +size 420912233 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..f0c2b1bec82c29eb84440ab41353388f8821e2e2 --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9a793d005134c927444bc883ef1dcd9d1cbc7896b0c3844d99599edebfea67c +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..4cd3ecfac3ea52c5d7d5161702ed8462fbc7f0e8 --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 33.01, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9498, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8436, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.625, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.5012, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3399, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2438, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1962, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0903, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0601, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9726, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.9202, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.9065, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8338, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7941, + "step": 900 + }, + { + "epoch": 26.01, + "learning_rate": 0.0001388888888888889, + "loss": 2.7915, + "step": 950 + }, + { + "epoch": 27.02, + "learning_rate": 0.00011111111111111109, + "loss": 2.7268, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_loss": 3.4587721824645996, + "eval_runtime": 2.4277, + "eval_samples_per_second": 54.372, + "eval_steps_per_second": 3.707, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4587721824645996, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 31.777935045784314, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4277, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 54.372, + "step": 1000 + }, + { + "epoch": 29.0, + "learning_rate": 8.333333333333333e-05, + "loss": 2.7262, + "step": 1050 + }, + { + "epoch": 30.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 2.6701, + "step": 1100 + }, + { + "epoch": 31.03, + "learning_rate": 2.7777777777777772e-05, + "loss": 2.6452, + "step": 1150 + }, + { + "epoch": 33.01, + "learning_rate": 0.0, + "loss": 2.6559, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.25835344576512e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..659b04f804f9725c6fd36ce3526df179917fa40f --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0d615c60241e4c79b6d1fdaa92f7c7a30b8faf593df18182968c9d04a4e0634 +size 816635249 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..136ca4803f4b0f5c3d6d59a721d288eee2b89dfd --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f9211ad55bb39b4ee22db2d1f18967347e9d9087b08a0a6ada46c5a862aed3 +size 420912233 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..fa75ae757855abcf0450efc2b896417b76a357f5 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c7ede958dcb6bb605c0331e9f62ef81d5e3ad19dd98e39e321a4b9313a8cf1 +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..72088165c2e559fa4dcd5ead54d783fca9d7f625 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.016666666666667, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.423377416192e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..b97bc5733f3535e2a68fc462b1d4d5b2f3a9b339 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a504ef57fbb5e729d3ba8ff00b4216ed1385a560129497d16a8ae95fe0e0177e +size 816635441 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..5351c6945d6bf626fc8f47fc1b106372f2842397 --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a66427db37def2333951f740cd1c6bb5f1f76cc632aa18adb456d712caef44c +size 420912233 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..d6a06e861d5bfee492a2fe59d46d430f32a0d678 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a05d607c8a26dbe263fb4196b72aedfda2ec7bb9230639424740612fd2de7e0 +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..df00aa285cacab256083b9073bd31cc9099094a9 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.01, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9498, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8436, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.1422777843712e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..6a783b28dabe04b267d39cb0329249228f423a70 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb17e71724fc08ee245390cfb40c846791ab2b16f81123f1c9a1de951a7b89a1 +size 816635441 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..969e5710e2bb570b6dd6f9010ad76abee60b4da4 --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13909e62056a9f988336221bb474705b68853cefaced01802587c77606d265a5 +size 420912233 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..54ea200a1eb40fcbcd4fd93aa81cd9b3a8cbce3f --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:912b9cde693e4e95486ec19f0d03eba126c2d70326c71a24ed1600df773d1ac4 +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..8927f19a87f5a5dc140d83a2eaa25013ac05e0bd --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.003333333333334, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9498, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8436, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.625, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.5012, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.08611781525504e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..00d291fd1de0c0f00f01d24742f87a383736a10d --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d73b2055e508edb64a0e4f4425b04b4bf79fcd99fb5fb82bb8f5b6776c19d0c +size 816635441 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..1fab1b73cf772469f435d3143adc3b2f2afcd634 --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f076d00fd0c8c4fed126cf2aa64359ea0c2beffb35a594b5ac65b85c10c477 +size 420912233 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..f075076d72efc5c03135043451470d48f66c4b08 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:583229c0f2b0b96c90406fa43234c180feee4a9bb0d0726668c61cd19bc85596 +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..bc49c0d293ec0fd5feb03dbaa3b21bbb21424155 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 13.026666666666667, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9498, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8436, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.625, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.5012, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3399, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2438, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.35656552005632e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..270b385d411e9b8d662939a14f605fa64a381e47 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3acce2a3efc11d710267651ffb65cadbbbe2799ad2db9e3600b50781e05c19b2 +size 816635441 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..20d171abc2b6819521b41b303e2b814cf324dc6f --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ced05032a10c736b271d9f8d6d0a499aa1b31d16c81f56ab9fa421f9bcc3dc4 +size 420912233 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..d1e97c72aa71884fbbdc45f068ebe389e2819e45 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77745843d35c0409927e5eceb12a91b398ca867c4f0212b5651f7fefb8413a9a +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..e4b13a55d8e5c9b2b3522af7fbd6c74b105e9ed2 --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 16.02, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9498, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8436, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.625, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.5012, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3399, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2438, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1962, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0903, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.62845555687424e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..e3949ff1193d09a41994a4036b43ac348de87c95 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f275d7034518c5158c172bf96c20ce6fb175f314207223cfb699978149269bbc +size 816635441 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..03df414446f183dd98320e48f20f02fb7d037b8b --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955affc689af5aa8a6f44861dbef95820ec8af5e36e8ec14bae1dba53e9e36bf +size 420912233 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..fd9736451cdc46ee3411741c7216c710c1119e90 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ed3bef256b632948ce5ccc058a761533c36c655fdd1ef63209aa830ee6f2b4 +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..94ac5be8a0e238c862bc31c3d02676a1f14234e9 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.013333333333332, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9498, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8436, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.625, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.5012, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3399, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2438, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1962, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0903, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0601, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9726, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.90034559369216e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..a7b85aec58d93ab598025bcc926ba715ec55f7de --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94430ebcd68255be6cd40ab8b07304a555a13d45c251f6a678273dd89fcb724b +size 816635441 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..74df7014c56df3681f4856aa295ac59c13090622 --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1559a756ba6aaafc1e7d4bec797492158a631c7a85be6be1ee5098f6e41b578a +size 420912233 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..5744e0fe0089d384c6bcc20904bb498331a4dfe7 --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751a6a5b8abea297b0f7dd52f5b56f68d8a434e77478aa0d9a96f3bdce785c86 +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..d38cc8575b01d233f10f41a60205fb8c0aeda104 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 22.006666666666668, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9498, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8436, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.625, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.5012, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3399, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2438, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1962, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0903, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0601, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9726, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.9202, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.9065, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.17223563051008e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..ad1cbb9cc92a947c81403f8c236585455132b308 --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e37f81925a2073634be256432b5b0b2f6a49a5bca488b04d893afb69807da3 +size 816635441 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..ae1dd97bcef3911bec9a2eb686b6baf9f4a97329 --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36c9a219cfdde876c0ca4932986e89ccb73b7c1b30beabfb391e7af3c18f6f39 +size 420912233 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..efecede3341de29228114b46596ec0ffe46e4bb1 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d7173c16cd386f771335b8d372d438ec3e52c2f826a648cbb1c6ecfc2b42dc9 +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..5328221fc64c84bbf3bd78ce67efd2b96ac6751b --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 24.03, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1072, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9379, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9342, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3732, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0714, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9498, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8436, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.625, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.5012, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3399, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2438, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1962, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0903, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0601, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9726, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.9202, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.9065, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8338, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7941, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.44268333531136e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183 diff --git a/config.json b/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/metrics.json b/metrics.json new file mode 100755 index 0000000000000000000000000000000000000000..1a7a45e3e3afe3250c100f99e9900224bf74a52a --- /dev/null +++ b/metrics.json @@ -0,0 +1,2494 @@ +{"num_parameters": 102068736, "trainable_parameters": 102068736, "step": 0} +{"train_info/time_between_train_steps": 2.7201790809631348, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 36.078070402145386, "step": 1} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 17276.0732421875, "train_info/memory_reserved": 18442.0, "train_info/memory_max_reserved": 18442.0, "_timestamp": 1740925555, "_runtime": 61}, "step": 1} +{"logs": {"train/loss": 10.1072, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1740925555, "_runtime": 61}, "step": 1} +{"train_info/time_between_train_steps": 0.10081982612609863, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 26.543968200683594, "step": 2} +{"train_info/time_between_train_steps": 0.0041925907135009766, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 26.430774927139282, "step": 3} +{"train_info/time_between_train_steps": 0.0041887760162353516, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 26.410144805908203, "step": 4} +{"train_info/time_between_train_steps": 0.004082918167114258, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 26.27102565765381, "step": 5} +{"train_info/time_between_train_steps": 0.004061222076416016, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 26.437849760055542, "step": 6} +{"train_info/time_between_train_steps": 0.004146575927734375, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 26.269134521484375, "step": 7} +{"train_info/time_between_train_steps": 0.004118919372558594, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 26.430882692337036, "step": 8} +{"train_info/time_between_train_steps": 0.003992319107055664, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 26.297466039657593, "step": 9} +{"train_info/time_between_train_steps": 0.0040323734283447266, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 26.433813333511353, "step": 10} +{"train_info/time_between_train_steps": 0.003968238830566406, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 26.317994594573975, "step": 11} +{"train_info/time_between_train_steps": 0.004456758499145508, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 26.510009765625, "step": 12} +{"train_info/time_between_train_steps": 0.004558563232421875, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 26.370148181915283, "step": 13} +{"train_info/time_between_train_steps": 0.004351377487182617, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 26.45802354812622, "step": 14} +{"train_info/time_between_train_steps": 0.004554033279418945, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 26.365854501724243, "step": 15} +{"train_info/time_between_train_steps": 0.004113674163818359, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 26.504144430160522, "step": 16} +{"train_info/time_between_train_steps": 0.004409313201904297, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 26.35249090194702, "step": 17} +{"train_info/time_between_train_steps": 0.02426767349243164, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 26.332366943359375, "step": 18} +{"train_info/time_between_train_steps": 0.004009246826171875, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 26.34012484550476, "step": 19} +{"train_info/time_between_train_steps": 0.004107952117919922, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 26.3377423286438, "step": 20} +{"train_info/time_between_train_steps": 0.004028797149658203, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 26.304044008255005, "step": 21} +{"train_info/time_between_train_steps": 0.004029512405395508, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 26.374577045440674, "step": 22} +{"train_info/time_between_train_steps": 0.003919839859008789, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 26.302592754364014, "step": 23} +{"train_info/time_between_train_steps": 0.0039980411529541016, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 26.340314388275146, "step": 24} +{"train_info/time_between_train_steps": 0.004092216491699219, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 26.330433130264282, "step": 25} +{"train_info/time_between_train_steps": 0.00415492057800293, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 26.3392436504364, "step": 26} +{"train_info/time_between_train_steps": 0.003981351852416992, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 26.3268985748291, "step": 27} +{"train_info/time_between_train_steps": 0.0040435791015625, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 26.348384857177734, "step": 28} +{"train_info/time_between_train_steps": 0.004325151443481445, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 26.33461594581604, "step": 29} +{"train_info/time_between_train_steps": 0.004169940948486328, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 26.34842300415039, "step": 30} +{"train_info/time_between_train_steps": 0.0063893795013427734, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 26.438119649887085, "step": 31} +{"train_info/time_between_train_steps": 0.004162788391113281, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 26.40572476387024, "step": 32} +{"train_info/time_between_train_steps": 0.004125118255615234, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 26.35135579109192, "step": 33} +{"train_info/time_between_train_steps": 0.00438237190246582, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 26.353355169296265, "step": 34} +{"train_info/time_between_train_steps": 0.004671812057495117, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 26.373234510421753, "step": 35} +{"train_info/time_between_train_steps": 0.004705667495727539, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 26.392733573913574, "step": 36} +{"train_info/time_between_train_steps": 0.005024433135986328, "step": 36} +{"train_info/time_between_train_steps": 16.43814516067505, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 26.366278886795044, "step": 37} +{"train_info/time_between_train_steps": 0.004442691802978516, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 26.480207920074463, "step": 38} +{"train_info/time_between_train_steps": 0.004122257232666016, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 26.37206721305847, "step": 39} +{"train_info/time_between_train_steps": 0.0044291019439697266, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 26.47957968711853, "step": 40} +{"train_info/time_between_train_steps": 0.004361867904663086, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 26.367872953414917, "step": 41} +{"train_info/time_between_train_steps": 0.004407167434692383, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 26.485936641693115, "step": 42} +{"train_info/time_between_train_steps": 0.004441738128662109, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 26.343380212783813, "step": 43} +{"train_info/time_between_train_steps": 0.004438161849975586, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 26.501582384109497, "step": 44} +{"train_info/time_between_train_steps": 0.0043888092041015625, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 26.353548765182495, "step": 45} +{"train_info/time_between_train_steps": 0.004422664642333984, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 26.48599410057068, "step": 46} +{"train_info/time_between_train_steps": 0.004088163375854492, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 26.42606472969055, "step": 47} +{"train_info/time_between_train_steps": 0.004354715347290039, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 26.49835753440857, "step": 48} +{"train_info/time_between_train_steps": 0.004542350769042969, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 26.36459994316101, "step": 49} +{"train_info/time_between_train_steps": 0.004485368728637695, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 26.463292837142944, "step": 50} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740926866, "_runtime": 1372}, "step": 50} +{"logs": {"train/loss": 6.9379, "train/learning_rate": 0.00025, "train/epoch": 1.01, "_timestamp": 1740926866, "_runtime": 1372}, "step": 50} +{"train_info/time_between_train_steps": 0.04745173454284668, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 26.38880944252014, "step": 51} +{"train_info/time_between_train_steps": 0.004324197769165039, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 26.43329644203186, "step": 52} +{"train_info/time_between_train_steps": 0.004659414291381836, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 26.374110221862793, "step": 53} +{"train_info/time_between_train_steps": 0.028535842895507812, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 26.332231283187866, "step": 54} +{"train_info/time_between_train_steps": 0.004174709320068359, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 26.341615438461304, "step": 55} +{"train_info/time_between_train_steps": 0.004187345504760742, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 26.34200930595398, "step": 56} +{"train_info/time_between_train_steps": 0.004129648208618164, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 26.34634518623352, "step": 57} +{"train_info/time_between_train_steps": 0.0041310787200927734, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 26.33476185798645, "step": 58} +{"train_info/time_between_train_steps": 0.004019498825073242, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 26.313700199127197, "step": 59} +{"train_info/time_between_train_steps": 0.005133152008056641, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 26.349793910980225, "step": 60} +{"train_info/time_between_train_steps": 0.0042629241943359375, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 26.328481674194336, "step": 61} +{"train_info/time_between_train_steps": 0.0040318965911865234, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 26.415731191635132, "step": 62} +{"train_info/time_between_train_steps": 0.004010677337646484, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 26.334575653076172, "step": 63} +{"train_info/time_between_train_steps": 0.0038771629333496094, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 26.339236974716187, "step": 64} +{"train_info/time_between_train_steps": 0.003998994827270508, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 26.326740503311157, "step": 65} +{"train_info/time_between_train_steps": 0.00406956672668457, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 26.317657709121704, "step": 66} +{"train_info/time_between_train_steps": 0.004082918167114258, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 26.355371475219727, "step": 67} +{"train_info/time_between_train_steps": 0.004011631011962891, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 26.31469464302063, "step": 68} +{"train_info/time_between_train_steps": 0.004125118255615234, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 26.34096121788025, "step": 69} +{"train_info/time_between_train_steps": 0.004447221755981445, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 26.330858945846558, "step": 70} +{"train_info/time_between_train_steps": 0.004289388656616211, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 26.35350275039673, "step": 71} +{"train_info/time_between_train_steps": 0.004589557647705078, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 26.359294414520264, "step": 72} +{"train_info/time_between_train_steps": 0.0049724578857421875, "step": 72} +{"train_info/time_between_train_steps": 18.39080047607422, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 27.108046293258667, "step": 73} +{"train_info/time_between_train_steps": 0.004320383071899414, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 26.46776556968689, "step": 74} +{"train_info/time_between_train_steps": 0.0040853023529052734, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 26.357723474502563, "step": 75} +{"train_info/time_between_train_steps": 0.004506349563598633, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 26.490022659301758, "step": 76} +{"train_info/time_between_train_steps": 0.0042612552642822266, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 26.359783411026, "step": 77} +{"train_info/time_between_train_steps": 0.004462242126464844, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 26.53709864616394, "step": 78} +{"train_info/time_between_train_steps": 0.004221439361572266, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 26.373738765716553, "step": 79} +{"train_info/time_between_train_steps": 0.006192684173583984, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 26.4952552318573, "step": 80} +{"train_info/time_between_train_steps": 0.004598379135131836, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 26.33162832260132, "step": 81} +{"train_info/time_between_train_steps": 0.00455021858215332, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 26.47154450416565, "step": 82} +{"train_info/time_between_train_steps": 0.0044002532958984375, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 26.36334490776062, "step": 83} +{"train_info/time_between_train_steps": 0.0045223236083984375, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 26.477413177490234, "step": 84} +{"train_info/time_between_train_steps": 0.004677295684814453, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 26.362709283828735, "step": 85} +{"train_info/time_between_train_steps": 0.004614114761352539, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 26.482940673828125, "step": 86} +{"train_info/time_between_train_steps": 0.0044553279876708984, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 26.361051559448242, "step": 87} +{"train_info/time_between_train_steps": 0.0043888092041015625, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 26.42983627319336, "step": 88} +{"train_info/time_between_train_steps": 0.0045049190521240234, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 26.400935411453247, "step": 89} +{"train_info/time_between_train_steps": 0.028939008712768555, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 26.306976079940796, "step": 90} +{"train_info/time_between_train_steps": 0.004073143005371094, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 26.344279289245605, "step": 91} +{"train_info/time_between_train_steps": 0.004068851470947266, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 26.342113494873047, "step": 92} +{"train_info/time_between_train_steps": 0.004167079925537109, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 26.41693902015686, "step": 93} +{"train_info/time_between_train_steps": 0.0041959285736083984, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 26.332449197769165, "step": 94} +{"train_info/time_between_train_steps": 0.004094600677490234, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 26.35115098953247, "step": 95} +{"train_info/time_between_train_steps": 0.004235744476318359, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 26.337095975875854, "step": 96} +{"train_info/time_between_train_steps": 0.004163026809692383, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 26.32129192352295, "step": 97} +{"train_info/time_between_train_steps": 0.0042417049407958984, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 26.350653886795044, "step": 98} +{"train_info/time_between_train_steps": 0.004162311553955078, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 26.343034744262695, "step": 99} +{"train_info/time_between_train_steps": 0.00416874885559082, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 26.351009130477905, "step": 100} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740928205, "_runtime": 2711}, "step": 100} +{"logs": {"train/loss": 4.9342, "train/learning_rate": 0.0005, "train/epoch": 2.02, "_timestamp": 1740928205, "_runtime": 2711}, "step": 100} +{"train_info/time_between_train_steps": 30.261109352111816, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 26.471405029296875, "step": 101} +{"train_info/time_between_train_steps": 0.0040760040283203125, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 26.390767335891724, "step": 102} +{"train_info/time_between_train_steps": 0.004238605499267578, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 26.353754997253418, "step": 103} +{"train_info/time_between_train_steps": 0.004216432571411133, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 26.34708595275879, "step": 104} +{"train_info/time_between_train_steps": 0.004277944564819336, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 26.325509786605835, "step": 105} +{"train_info/time_between_train_steps": 0.004515171051025391, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 26.359622716903687, "step": 106} +{"train_info/time_between_train_steps": 0.004517793655395508, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 26.37014889717102, "step": 107} +{"train_info/time_between_train_steps": 0.005090951919555664, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 26.39812684059143, "step": 108} +{"train_info/time_between_train_steps": 0.004962444305419922, "step": 108} +{"train_info/time_between_train_steps": 16.605083465576172, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 26.32278823852539, "step": 109} +{"train_info/time_between_train_steps": 0.004485368728637695, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 26.48135781288147, "step": 110} +{"train_info/time_between_train_steps": 0.004150867462158203, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 26.36077308654785, "step": 111} +{"train_info/time_between_train_steps": 0.0043714046478271484, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 26.483140230178833, "step": 112} +{"train_info/time_between_train_steps": 0.0041656494140625, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 26.350770473480225, "step": 113} +{"train_info/time_between_train_steps": 0.004609823226928711, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 26.524726152420044, "step": 114} +{"train_info/time_between_train_steps": 0.004258871078491211, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 26.35198163986206, "step": 115} +{"train_info/time_between_train_steps": 0.004427433013916016, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 26.49274468421936, "step": 116} +{"train_info/time_between_train_steps": 0.004462242126464844, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 26.359429836273193, "step": 117} +{"train_info/time_between_train_steps": 0.0044155120849609375, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 26.642404794692993, "step": 118} +{"train_info/time_between_train_steps": 0.005461215972900391, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 26.398263454437256, "step": 119} +{"train_info/time_between_train_steps": 0.005452871322631836, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 26.518198013305664, "step": 120} +{"train_info/time_between_train_steps": 0.005105495452880859, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 26.364461421966553, "step": 121} +{"train_info/time_between_train_steps": 0.00538325309753418, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 26.494805097579956, "step": 122} +{"train_info/time_between_train_steps": 0.0048291683197021484, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 26.379379510879517, "step": 123} +{"train_info/time_between_train_steps": 0.004645586013793945, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 26.533055543899536, "step": 124} +{"train_info/time_between_train_steps": 0.004955768585205078, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 26.34773588180542, "step": 125} +{"train_info/time_between_train_steps": 0.02827739715576172, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 26.37736749649048, "step": 126} +{"train_info/time_between_train_steps": 0.004349231719970703, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 26.322616815567017, "step": 127} +{"train_info/time_between_train_steps": 0.004630565643310547, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 26.354130268096924, "step": 128} +{"train_info/time_between_train_steps": 0.004535675048828125, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 26.339648962020874, "step": 129} +{"train_info/time_between_train_steps": 0.004688262939453125, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 26.35568070411682, "step": 130} +{"train_info/time_between_train_steps": 0.004512786865234375, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 26.328452587127686, "step": 131} +{"train_info/time_between_train_steps": 0.004651784896850586, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 26.33880376815796, "step": 132} +{"train_info/time_between_train_steps": 0.004783153533935547, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 26.31388211250305, "step": 133} +{"train_info/time_between_train_steps": 0.004168272018432617, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 26.293895721435547, "step": 134} +{"train_info/time_between_train_steps": 0.004180431365966797, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 26.317638874053955, "step": 135} +{"train_info/time_between_train_steps": 0.0043010711669921875, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 26.314316987991333, "step": 136} +{"train_info/time_between_train_steps": 0.004183292388916016, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 26.31457233428955, "step": 137} +{"train_info/time_between_train_steps": 0.004340171813964844, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 26.302738904953003, "step": 138} +{"train_info/time_between_train_steps": 0.004120349884033203, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 26.381237983703613, "step": 139} +{"train_info/time_between_train_steps": 0.004179954528808594, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 26.306252002716064, "step": 140} +{"train_info/time_between_train_steps": 0.004263639450073242, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 26.292351722717285, "step": 141} +{"train_info/time_between_train_steps": 0.0043904781341552734, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 26.35942530632019, "step": 142} +{"train_info/time_between_train_steps": 0.004675149917602539, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 26.324097156524658, "step": 143} +{"train_info/time_between_train_steps": 0.005058765411376953, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 26.41270685195923, "step": 144} +{"train_info/time_between_train_steps": 0.0057108402252197266, "step": 144} +{"train_info/time_between_train_steps": 16.6528160572052, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 26.315715074539185, "step": 145} +{"train_info/time_between_train_steps": 0.0040013790130615234, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 26.404312133789062, "step": 146} +{"train_info/time_between_train_steps": 0.0041501522064208984, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 26.27957510948181, "step": 147} +{"train_info/time_between_train_steps": 0.004111051559448242, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 26.400957822799683, "step": 148} +{"train_info/time_between_train_steps": 0.0040361881256103516, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 26.31938099861145, "step": 149} +{"train_info/time_between_train_steps": 0.0040760040283203125, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 26.440757036209106, "step": 150} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740929589, "_runtime": 4095}, "step": 150} +{"logs": {"train/loss": 4.3732, "train/learning_rate": 0.0005833333333333333, "train/epoch": 4.0, "_timestamp": 1740929589, "_runtime": 4095}, "step": 150} +{"train_info/time_between_train_steps": 0.025029897689819336, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 26.304205656051636, "step": 151} +{"train_info/time_between_train_steps": 0.0041081905364990234, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 26.41195273399353, "step": 152} +{"train_info/time_between_train_steps": 0.003984689712524414, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 26.306722402572632, "step": 153} +{"train_info/time_between_train_steps": 0.004096269607543945, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 26.39926505088806, "step": 154} +{"train_info/time_between_train_steps": 0.0039844512939453125, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 26.401583194732666, "step": 155} +{"train_info/time_between_train_steps": 0.004160165786743164, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 26.42385721206665, "step": 156} +{"train_info/time_between_train_steps": 0.004012346267700195, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 26.324392318725586, "step": 157} +{"train_info/time_between_train_steps": 0.004189252853393555, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 26.406030654907227, "step": 158} +{"train_info/time_between_train_steps": 0.003995656967163086, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 26.32502770423889, "step": 159} +{"train_info/time_between_train_steps": 0.004044055938720703, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 26.400731801986694, "step": 160} +{"train_info/time_between_train_steps": 0.004169464111328125, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 26.5943706035614, "step": 161} +{"train_info/time_between_train_steps": 0.017464876174926758, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 26.789079904556274, "step": 162} +{"train_info/time_between_train_steps": 0.004030466079711914, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 26.493507385253906, "step": 163} +{"train_info/time_between_train_steps": 0.0038385391235351562, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 26.2901349067688, "step": 164} +{"train_info/time_between_train_steps": 0.003976345062255859, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 26.293254375457764, "step": 165} +{"train_info/time_between_train_steps": 0.003938198089599609, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 26.311196088790894, "step": 166} +{"train_info/time_between_train_steps": 0.0038509368896484375, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 26.282419204711914, "step": 167} +{"train_info/time_between_train_steps": 0.0038652420043945312, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 26.30770206451416, "step": 168} +{"train_info/time_between_train_steps": 0.004014492034912109, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 26.287160873413086, "step": 169} +{"train_info/time_between_train_steps": 0.003973245620727539, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 26.367019414901733, "step": 170} +{"train_info/time_between_train_steps": 0.0039196014404296875, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 26.28819751739502, "step": 171} +{"train_info/time_between_train_steps": 0.003981828689575195, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 26.269626140594482, "step": 172} +{"train_info/time_between_train_steps": 0.003964424133300781, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 26.3115975856781, "step": 173} +{"train_info/time_between_train_steps": 0.003939628601074219, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 26.302815198898315, "step": 174} +{"train_info/time_between_train_steps": 0.003977775573730469, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 26.322418689727783, "step": 175} +{"train_info/time_between_train_steps": 0.0038580894470214844, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 26.300801515579224, "step": 176} +{"train_info/time_between_train_steps": 0.0040247440338134766, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 26.331734895706177, "step": 177} +{"train_info/time_between_train_steps": 0.0041387081146240234, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 26.307843923568726, "step": 178} +{"train_info/time_between_train_steps": 0.004376888275146484, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 26.29014015197754, "step": 179} +{"train_info/time_between_train_steps": 0.004922628402709961, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 26.358100414276123, "step": 180} +{"train_info/time_between_train_steps": 0.004937171936035156, "step": 180} +{"train_info/time_between_train_steps": 16.58294439315796, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 26.288187742233276, "step": 181} +{"train_info/time_between_train_steps": 0.003782033920288086, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 26.386529684066772, "step": 182} +{"train_info/time_between_train_steps": 0.0039272308349609375, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 26.268930196762085, "step": 183} +{"train_info/time_between_train_steps": 0.0040187835693359375, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 26.41179370880127, "step": 184} +{"train_info/time_between_train_steps": 0.003973484039306641, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 26.332879066467285, "step": 185} +{"train_info/time_between_train_steps": 0.0041391849517822266, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 26.393676042556763, "step": 186} +{"train_info/time_between_train_steps": 0.003864288330078125, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 26.289770364761353, "step": 187} +{"train_info/time_between_train_steps": 0.003914833068847656, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 26.411069869995117, "step": 188} +{"train_info/time_between_train_steps": 0.0039827823638916016, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 26.303126573562622, "step": 189} +{"train_info/time_between_train_steps": 0.004032611846923828, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 26.412001132965088, "step": 190} +{"train_info/time_between_train_steps": 0.003945112228393555, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 26.303770303726196, "step": 191} +{"train_info/time_between_train_steps": 0.004016399383544922, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 26.415440559387207, "step": 192} +{"train_info/time_between_train_steps": 0.004328489303588867, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 26.33249592781067, "step": 193} +{"train_info/time_between_train_steps": 0.00422215461730957, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 26.525541305541992, "step": 194} +{"train_info/time_between_train_steps": 0.003900289535522461, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 26.33868098258972, "step": 195} +{"train_info/time_between_train_steps": 0.003780841827392578, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 26.39219856262207, "step": 196} +{"train_info/time_between_train_steps": 0.004229545593261719, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 26.320857524871826, "step": 197} +{"train_info/time_between_train_steps": 0.01637554168701172, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 26.288548469543457, "step": 198} +{"train_info/time_between_train_steps": 0.0039136409759521484, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 26.270843982696533, "step": 199} +{"train_info/time_between_train_steps": 0.003789663314819336, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 26.31706213951111, "step": 200} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740930926, "_runtime": 5432}, "step": 200} +{"logs": {"train/loss": 4.0714, "train/learning_rate": 0.0005555555555555556, "train/epoch": 5.02, "_timestamp": 1740930926, "_runtime": 5432}, "step": 200} +{"train_info/time_between_train_steps": 30.606271505355835, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 26.479132890701294, "step": 201} +{"train_info/time_between_train_steps": 0.003945827484130859, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 26.35827875137329, "step": 202} +{"train_info/time_between_train_steps": 0.003915548324584961, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 26.308558702468872, "step": 203} +{"train_info/time_between_train_steps": 0.0038945674896240234, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 26.2979576587677, "step": 204} +{"train_info/time_between_train_steps": 0.004001140594482422, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 26.318878650665283, "step": 205} +{"train_info/time_between_train_steps": 0.0038673877716064453, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 26.29394292831421, "step": 206} +{"train_info/time_between_train_steps": 0.00393223762512207, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 26.32800269126892, "step": 207} +{"train_info/time_between_train_steps": 0.0041658878326416016, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 26.312912225723267, "step": 208} +{"train_info/time_between_train_steps": 0.0039348602294921875, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 26.27583360671997, "step": 209} +{"train_info/time_between_train_steps": 0.0039310455322265625, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 26.316095113754272, "step": 210} +{"train_info/time_between_train_steps": 0.003856182098388672, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 26.306719064712524, "step": 211} +{"train_info/time_between_train_steps": 0.003958225250244141, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 26.330716848373413, "step": 212} +{"train_info/time_between_train_steps": 0.004082918167114258, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 26.29652166366577, "step": 213} +{"train_info/time_between_train_steps": 0.004014253616333008, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 26.32774305343628, "step": 214} +{"train_info/time_between_train_steps": 0.0041391849517822266, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 26.31474804878235, "step": 215} +{"train_info/time_between_train_steps": 0.00465846061706543, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 26.320667028427124, "step": 216} +{"train_info/time_between_train_steps": 0.004918813705444336, "step": 216} +{"train_info/time_between_train_steps": 16.447136640548706, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 26.276061534881592, "step": 217} +{"train_info/time_between_train_steps": 0.003729581832885742, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 26.404484510421753, "step": 218} +{"train_info/time_between_train_steps": 0.003826618194580078, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 26.2910578250885, "step": 219} +{"train_info/time_between_train_steps": 0.0040493011474609375, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 26.393603801727295, "step": 220} +{"train_info/time_between_train_steps": 0.003980875015258789, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 26.33421754837036, "step": 221} +{"train_info/time_between_train_steps": 0.00400853157043457, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 26.347464323043823, "step": 222} +{"train_info/time_between_train_steps": 0.0038750171661376953, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 26.30002212524414, "step": 223} +{"train_info/time_between_train_steps": 0.003962278366088867, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 26.415040254592896, "step": 224} +{"train_info/time_between_train_steps": 0.003951549530029297, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 26.32346820831299, "step": 225} +{"train_info/time_between_train_steps": 0.00410151481628418, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 26.386565923690796, "step": 226} +{"train_info/time_between_train_steps": 0.003914356231689453, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 26.31389045715332, "step": 227} +{"train_info/time_between_train_steps": 0.0039825439453125, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 26.418681621551514, "step": 228} +{"train_info/time_between_train_steps": 0.004021644592285156, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 26.282816886901855, "step": 229} +{"train_info/time_between_train_steps": 0.004395484924316406, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 26.46148157119751, "step": 230} +{"train_info/time_between_train_steps": 0.004383563995361328, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 26.316788911819458, "step": 231} +{"train_info/time_between_train_steps": 0.004282951354980469, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 26.502500534057617, "step": 232} +{"train_info/time_between_train_steps": 0.006208896636962891, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 26.326664447784424, "step": 233} +{"train_info/time_between_train_steps": 0.029100656509399414, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 26.333056211471558, "step": 234} +{"train_info/time_between_train_steps": 0.0041179656982421875, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 26.310975313186646, "step": 235} +{"train_info/time_between_train_steps": 0.003983020782470703, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 26.296810150146484, "step": 236} +{"train_info/time_between_train_steps": 0.00388336181640625, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 26.34024715423584, "step": 237} +{"train_info/time_between_train_steps": 0.003961801528930664, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 26.266175031661987, "step": 238} +{"train_info/time_between_train_steps": 0.004000425338745117, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 26.321442365646362, "step": 239} +{"train_info/time_between_train_steps": 0.0039520263671875, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 26.29810380935669, "step": 240} +{"train_info/time_between_train_steps": 0.004075288772583008, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 26.32348108291626, "step": 241} +{"train_info/time_between_train_steps": 0.004060029983520508, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 26.302371978759766, "step": 242} +{"train_info/time_between_train_steps": 0.00394749641418457, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 26.321587085723877, "step": 243} +{"train_info/time_between_train_steps": 0.003937482833862305, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 26.30548620223999, "step": 244} +{"train_info/time_between_train_steps": 0.003989458084106445, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 26.276754140853882, "step": 245} +{"train_info/time_between_train_steps": 0.0038938522338867188, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 26.31089496612549, "step": 246} +{"train_info/time_between_train_steps": 0.004081249237060547, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 26.356810569763184, "step": 247} +{"train_info/time_between_train_steps": 0.0040400028228759766, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 26.320066690444946, "step": 248} +{"train_info/time_between_train_steps": 0.003927469253540039, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 26.29786205291748, "step": 249} +{"train_info/time_between_train_steps": 0.004058837890625, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 26.324134826660156, "step": 250} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740932291, "_runtime": 6797}, "step": 250} +{"logs": {"train/loss": 3.9498, "train/learning_rate": 0.0005277777777777777, "train/epoch": 6.03, "_timestamp": 1740932291, "_runtime": 6797}, "step": 250} +{"train_info/time_between_train_steps": 0.048241376876831055, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 26.31901717185974, "step": 251} +{"train_info/time_between_train_steps": 0.004601955413818359, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 27.386241912841797, "step": 252} +{"train_info/time_between_train_steps": 0.0051975250244140625, "step": 252} +{"train_info/time_between_train_steps": 23.656141757965088, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 26.765395402908325, "step": 253} +{"train_info/time_between_train_steps": 0.003969907760620117, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 26.41308283805847, "step": 254} +{"train_info/time_between_train_steps": 0.0039882659912109375, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 26.304632663726807, "step": 255} +{"train_info/time_between_train_steps": 0.003911018371582031, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 26.412975311279297, "step": 256} +{"train_info/time_between_train_steps": 0.004025936126708984, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 26.299643993377686, "step": 257} +{"train_info/time_between_train_steps": 0.004054069519042969, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 26.447442770004272, "step": 258} +{"train_info/time_between_train_steps": 0.004113674163818359, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 26.308027505874634, "step": 259} +{"train_info/time_between_train_steps": 0.003920555114746094, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 26.356308221817017, "step": 260} +{"train_info/time_between_train_steps": 0.003953218460083008, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 26.298173189163208, "step": 261} +{"train_info/time_between_train_steps": 0.004086732864379883, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 26.45974612236023, "step": 262} +{"train_info/time_between_train_steps": 0.004012107849121094, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 26.318018913269043, "step": 263} +{"train_info/time_between_train_steps": 0.004008293151855469, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 26.41434407234192, "step": 264} +{"train_info/time_between_train_steps": 0.004156351089477539, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 26.33522605895996, "step": 265} +{"train_info/time_between_train_steps": 0.004155874252319336, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 26.395615577697754, "step": 266} +{"train_info/time_between_train_steps": 0.003900289535522461, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 26.29697847366333, "step": 267} +{"train_info/time_between_train_steps": 0.004029273986816406, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 26.422763109207153, "step": 268} +{"train_info/time_between_train_steps": 0.003977537155151367, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 26.29027557373047, "step": 269} +{"train_info/time_between_train_steps": 0.015058755874633789, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 26.30606985092163, "step": 270} +{"train_info/time_between_train_steps": 0.0037636756896972656, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 26.305058479309082, "step": 271} +{"train_info/time_between_train_steps": 0.003824472427368164, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 26.326199769973755, "step": 272} +{"train_info/time_between_train_steps": 0.003903627395629883, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 26.2965407371521, "step": 273} +{"train_info/time_between_train_steps": 0.004001140594482422, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 26.331672430038452, "step": 274} +{"train_info/time_between_train_steps": 0.005801677703857422, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 26.319733381271362, "step": 275} +{"train_info/time_between_train_steps": 0.0038471221923828125, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 26.28534245491028, "step": 276} +{"train_info/time_between_train_steps": 0.004002094268798828, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 26.316314935684204, "step": 277} +{"train_info/time_between_train_steps": 0.003908634185791016, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 26.36594843864441, "step": 278} +{"train_info/time_between_train_steps": 0.0039055347442626953, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 26.331374406814575, "step": 279} +{"train_info/time_between_train_steps": 0.0039031505584716797, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 26.29745650291443, "step": 280} +{"train_info/time_between_train_steps": 0.00397181510925293, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 26.328678607940674, "step": 281} +{"train_info/time_between_train_steps": 0.003843069076538086, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 26.30795693397522, "step": 282} +{"train_info/time_between_train_steps": 0.004031181335449219, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 27.04195547103882, "step": 283} +{"train_info/time_between_train_steps": 0.004242658615112305, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 26.867671489715576, "step": 284} +{"train_info/time_between_train_steps": 0.004196643829345703, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 26.36294937133789, "step": 285} +{"train_info/time_between_train_steps": 0.004159450531005859, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 27.29453420639038, "step": 286} +{"train_info/time_between_train_steps": 0.0049381256103515625, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 28.240972757339478, "step": 287} +{"train_info/time_between_train_steps": 0.004410266876220703, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 26.36314344406128, "step": 288} +{"train_info/time_between_train_steps": 0.004691600799560547, "step": 288} +{"train_info/time_between_train_steps": 16.556506395339966, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 26.30731511116028, "step": 289} +{"train_info/time_between_train_steps": 0.0036745071411132812, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 26.36835789680481, "step": 290} +{"train_info/time_between_train_steps": 0.0037186145782470703, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 26.264390230178833, "step": 291} +{"train_info/time_between_train_steps": 0.003674745559692383, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 26.407090663909912, "step": 292} +{"train_info/time_between_train_steps": 0.004229068756103516, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 26.391268968582153, "step": 293} +{"train_info/time_between_train_steps": 0.003848552703857422, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 26.391870260238647, "step": 294} +{"train_info/time_between_train_steps": 0.003993988037109375, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 26.287360191345215, "step": 295} +{"train_info/time_between_train_steps": 0.003947734832763672, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 26.39419460296631, "step": 296} +{"train_info/time_between_train_steps": 0.003874063491821289, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 26.28437614440918, "step": 297} +{"train_info/time_between_train_steps": 0.003846883773803711, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 26.36673593521118, "step": 298} +{"train_info/time_between_train_steps": 0.0038743019104003906, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 26.303540468215942, "step": 299} +{"train_info/time_between_train_steps": 0.003973722457885742, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 26.368932247161865, "step": 300} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740933659, "_runtime": 8165}, "step": 300} +{"logs": {"train/loss": 3.8436, "train/learning_rate": 0.0005, "train/epoch": 8.01, "_timestamp": 1740933659, "_runtime": 8165}, "step": 300} +{"train_info/time_between_train_steps": 51.44028973579407, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 26.401034116744995, "step": 301} +{"train_info/time_between_train_steps": 0.0038213729858398438, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 26.539058446884155, "step": 302} +{"train_info/time_between_train_steps": 0.003994941711425781, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 26.34849190711975, "step": 303} +{"train_info/time_between_train_steps": 0.003939390182495117, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 26.374764442443848, "step": 304} +{"train_info/time_between_train_steps": 0.003973484039306641, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 26.302910089492798, "step": 305} +{"train_info/time_between_train_steps": 0.013349294662475586, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 26.30878210067749, "step": 306} +{"train_info/time_between_train_steps": 0.0037796497344970703, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 26.29271960258484, "step": 307} +{"train_info/time_between_train_steps": 0.003643512725830078, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 26.31070041656494, "step": 308} +{"train_info/time_between_train_steps": 0.0036933422088623047, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 26.31777596473694, "step": 309} +{"train_info/time_between_train_steps": 0.003896474838256836, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 26.399232149124146, "step": 310} +{"train_info/time_between_train_steps": 0.0037183761596679688, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 26.302313566207886, "step": 311} +{"train_info/time_between_train_steps": 0.0037560462951660156, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 26.304062128067017, "step": 312} +{"train_info/time_between_train_steps": 0.0038399696350097656, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 26.30182123184204, "step": 313} +{"train_info/time_between_train_steps": 0.0037114620208740234, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 26.29504942893982, "step": 314} +{"train_info/time_between_train_steps": 0.003931522369384766, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 26.33365225791931, "step": 315} +{"train_info/time_between_train_steps": 0.0038225650787353516, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 26.31505823135376, "step": 316} +{"train_info/time_between_train_steps": 0.003705263137817383, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 26.29802441596985, "step": 317} +{"train_info/time_between_train_steps": 0.0037353038787841797, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 26.329500198364258, "step": 318} +{"train_info/time_between_train_steps": 0.003654956817626953, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 26.33216691017151, "step": 319} +{"train_info/time_between_train_steps": 0.00370025634765625, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 26.296109676361084, "step": 320} +{"train_info/time_between_train_steps": 0.003880023956298828, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 26.283617973327637, "step": 321} +{"train_info/time_between_train_steps": 0.004188060760498047, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 26.379514694213867, "step": 322} +{"train_info/time_between_train_steps": 0.0040264129638671875, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 26.29429316520691, "step": 323} +{"train_info/time_between_train_steps": 0.00402522087097168, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 26.480103015899658, "step": 324} +{"train_info/time_between_train_steps": 0.0043277740478515625, "step": 324} +{"train_info/time_between_train_steps": 16.510496139526367, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 26.31899642944336, "step": 325} +{"train_info/time_between_train_steps": 0.0036818981170654297, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 26.39208483695984, "step": 326} +{"train_info/time_between_train_steps": 0.003759145736694336, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 26.30473828315735, "step": 327} +{"train_info/time_between_train_steps": 0.003817319869995117, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 26.39277744293213, "step": 328} +{"train_info/time_between_train_steps": 0.004027605056762695, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 26.294517517089844, "step": 329} +{"train_info/time_between_train_steps": 0.004118442535400391, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 26.417054891586304, "step": 330} +{"train_info/time_between_train_steps": 0.004052162170410156, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 26.298009157180786, "step": 331} +{"train_info/time_between_train_steps": 0.003978729248046875, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 26.43013644218445, "step": 332} +{"train_info/time_between_train_steps": 0.003972053527832031, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 26.3045973777771, "step": 333} +{"train_info/time_between_train_steps": 0.003885030746459961, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 26.424028396606445, "step": 334} +{"train_info/time_between_train_steps": 0.004055500030517578, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 26.306971311569214, "step": 335} +{"train_info/time_between_train_steps": 0.003969669342041016, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 26.419546604156494, "step": 336} +{"train_info/time_between_train_steps": 0.003989696502685547, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 26.312731504440308, "step": 337} +{"train_info/time_between_train_steps": 0.004022836685180664, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 26.36618661880493, "step": 338} +{"train_info/time_between_train_steps": 0.0039215087890625, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 26.379167318344116, "step": 339} +{"train_info/time_between_train_steps": 0.0038843154907226562, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 26.40154790878296, "step": 340} +{"train_info/time_between_train_steps": 0.0040051937103271484, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 26.31598663330078, "step": 341} +{"train_info/time_between_train_steps": 0.014596223831176758, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 26.295308113098145, "step": 342} +{"train_info/time_between_train_steps": 0.003818988800048828, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 26.304914236068726, "step": 343} +{"train_info/time_between_train_steps": 0.0038843154907226562, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 26.314478874206543, "step": 344} +{"train_info/time_between_train_steps": 0.003779888153076172, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 26.327203273773193, "step": 345} +{"train_info/time_between_train_steps": 0.003805398941040039, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 26.274905920028687, "step": 346} +{"train_info/time_between_train_steps": 0.0038993358612060547, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 26.297030448913574, "step": 347} +{"train_info/time_between_train_steps": 0.003810882568359375, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 26.316637754440308, "step": 348} +{"train_info/time_between_train_steps": 0.0038251876831054688, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 26.318049430847168, "step": 349} +{"train_info/time_between_train_steps": 0.0038886070251464844, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 26.306476354599, "step": 350} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740935045, "_runtime": 9551}, "step": 350} +{"logs": {"train/loss": 3.625, "train/learning_rate": 0.00047222222222222224, "train/epoch": 9.02, "_timestamp": 1740935045, "_runtime": 9551}, "step": 350} +{"train_info/time_between_train_steps": 0.04396176338195801, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 26.305142164230347, "step": 351} +{"train_info/time_between_train_steps": 0.003860950469970703, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 26.305799961090088, "step": 352} +{"train_info/time_between_train_steps": 0.003805398941040039, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 26.2933189868927, "step": 353} +{"train_info/time_between_train_steps": 0.003924846649169922, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 26.262192249298096, "step": 354} +{"train_info/time_between_train_steps": 0.003964900970458984, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 26.36498260498047, "step": 355} +{"train_info/time_between_train_steps": 0.003869295120239258, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 26.28571605682373, "step": 356} +{"train_info/time_between_train_steps": 0.003964424133300781, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 26.3078670501709, "step": 357} +{"train_info/time_between_train_steps": 0.004014492034912109, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 26.308706521987915, "step": 358} +{"train_info/time_between_train_steps": 0.004308462142944336, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 26.313162088394165, "step": 359} +{"train_info/time_between_train_steps": 0.004519939422607422, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 26.342973470687866, "step": 360} +{"train_info/time_between_train_steps": 0.004849672317504883, "step": 360} +{"train_info/time_between_train_steps": 16.32834792137146, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 26.2917697429657, "step": 361} +{"train_info/time_between_train_steps": 0.003844022750854492, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 26.353420734405518, "step": 362} +{"train_info/time_between_train_steps": 0.003718852996826172, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 26.289231061935425, "step": 363} +{"train_info/time_between_train_steps": 0.003768444061279297, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 26.39735221862793, "step": 364} +{"train_info/time_between_train_steps": 0.003996849060058594, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 26.30676770210266, "step": 365} +{"train_info/time_between_train_steps": 0.0040013790130615234, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 26.38082218170166, "step": 366} +{"train_info/time_between_train_steps": 0.004102230072021484, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 26.279165506362915, "step": 367} +{"train_info/time_between_train_steps": 0.004015207290649414, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 26.398298263549805, "step": 368} +{"train_info/time_between_train_steps": 0.0039055347442626953, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 26.30722737312317, "step": 369} +{"train_info/time_between_train_steps": 0.004094839096069336, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 26.49166250228882, "step": 370} +{"train_info/time_between_train_steps": 0.003999948501586914, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 26.321455240249634, "step": 371} +{"train_info/time_between_train_steps": 0.004044294357299805, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 26.404380559921265, "step": 372} +{"train_info/time_between_train_steps": 0.0040857791900634766, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 26.30823063850403, "step": 373} +{"train_info/time_between_train_steps": 0.004114627838134766, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 26.408252000808716, "step": 374} +{"train_info/time_between_train_steps": 0.0040264129638671875, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 26.312886476516724, "step": 375} +{"train_info/time_between_train_steps": 0.003999948501586914, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 26.388986349105835, "step": 376} +{"train_info/time_between_train_steps": 0.004191875457763672, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 26.312442779541016, "step": 377} +{"train_info/time_between_train_steps": 0.014828681945800781, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 26.29264497756958, "step": 378} +{"train_info/time_between_train_steps": 0.0037298202514648438, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 26.310399055480957, "step": 379} +{"train_info/time_between_train_steps": 0.003851652145385742, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 26.30270743370056, "step": 380} +{"train_info/time_between_train_steps": 0.003907442092895508, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 26.30629849433899, "step": 381} +{"train_info/time_between_train_steps": 0.003896474838256836, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 26.305156707763672, "step": 382} +{"train_info/time_between_train_steps": 0.003802776336669922, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 26.277920484542847, "step": 383} +{"train_info/time_between_train_steps": 0.0038721561431884766, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 26.3227756023407, "step": 384} +{"train_info/time_between_train_steps": 0.003979206085205078, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 26.3023784160614, "step": 385} +{"train_info/time_between_train_steps": 0.0038840770721435547, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 26.373297452926636, "step": 386} +{"train_info/time_between_train_steps": 0.003826141357421875, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 26.300121068954468, "step": 387} +{"train_info/time_between_train_steps": 0.0038564205169677734, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 26.313684463500977, "step": 388} +{"train_info/time_between_train_steps": 0.003858804702758789, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 26.308807134628296, "step": 389} +{"train_info/time_between_train_steps": 0.0040361881256103516, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 26.31744384765625, "step": 390} +{"train_info/time_between_train_steps": 0.0038330554962158203, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 26.283962726593018, "step": 391} +{"train_info/time_between_train_steps": 0.0038461685180664062, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 26.30365824699402, "step": 392} +{"train_info/time_between_train_steps": 0.003985404968261719, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 26.32194232940674, "step": 393} +{"train_info/time_between_train_steps": 0.00407719612121582, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 26.325550079345703, "step": 394} +{"train_info/time_between_train_steps": 0.004225492477416992, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 26.33533501625061, "step": 395} +{"train_info/time_between_train_steps": 0.004317760467529297, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 26.354658842086792, "step": 396} +{"train_info/time_between_train_steps": 0.00475311279296875, "step": 396} +{"train_info/time_between_train_steps": 16.48871874809265, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 26.33174157142639, "step": 397} +{"train_info/time_between_train_steps": 0.003654003143310547, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 26.39084482192993, "step": 398} +{"train_info/time_between_train_steps": 0.003664255142211914, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 26.33787989616394, "step": 399} +{"train_info/time_between_train_steps": 0.003979921340942383, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 26.444265604019165, "step": 400} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740936396, "_runtime": 10902}, "step": 400} +{"logs": {"train/loss": 3.5012, "train/learning_rate": 0.00044444444444444436, "train/epoch": 11.0, "_timestamp": 1740936396, "_runtime": 10902}, "step": 400} +{"train_info/time_between_train_steps": 30.186365127563477, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 26.49423050880432, "step": 401} +{"train_info/time_between_train_steps": 0.003958702087402344, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 26.443530321121216, "step": 402} +{"train_info/time_between_train_steps": 0.0039784908294677734, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 26.349140644073486, "step": 403} +{"train_info/time_between_train_steps": 0.003984212875366211, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 26.37553119659424, "step": 404} +{"train_info/time_between_train_steps": 0.003925323486328125, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 26.310165405273438, "step": 405} +{"train_info/time_between_train_steps": 0.003859281539916992, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 26.38314962387085, "step": 406} +{"train_info/time_between_train_steps": 0.003797769546508789, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 26.303179502487183, "step": 407} +{"train_info/time_between_train_steps": 0.004059791564941406, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 26.39940309524536, "step": 408} +{"train_info/time_between_train_steps": 0.004036903381347656, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 26.27697515487671, "step": 409} +{"train_info/time_between_train_steps": 0.0039958953857421875, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 26.429683208465576, "step": 410} +{"train_info/time_between_train_steps": 0.003874063491821289, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 26.292856454849243, "step": 411} +{"train_info/time_between_train_steps": 0.0039014816284179688, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 26.490278244018555, "step": 412} +{"train_info/time_between_train_steps": 0.004014730453491211, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 26.305298805236816, "step": 413} +{"train_info/time_between_train_steps": 0.014767646789550781, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 26.31042766571045, "step": 414} +{"train_info/time_between_train_steps": 0.003632068634033203, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 26.356624364852905, "step": 415} +{"train_info/time_between_train_steps": 0.0038366317749023438, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 26.38478374481201, "step": 416} +{"train_info/time_between_train_steps": 0.0038323402404785156, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 26.31295919418335, "step": 417} +{"train_info/time_between_train_steps": 0.003876924514770508, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 26.27362823486328, "step": 418} +{"train_info/time_between_train_steps": 0.0039844512939453125, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 27.974694967269897, "step": 419} +{"train_info/time_between_train_steps": 0.004007816314697266, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 26.509746074676514, "step": 420} +{"train_info/time_between_train_steps": 0.0039250850677490234, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 26.317464351654053, "step": 421} +{"train_info/time_between_train_steps": 0.003968238830566406, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 26.311166048049927, "step": 422} +{"train_info/time_between_train_steps": 0.0038368701934814453, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 26.31086039543152, "step": 423} +{"train_info/time_between_train_steps": 0.003891468048095703, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 26.31455111503601, "step": 424} +{"train_info/time_between_train_steps": 0.003836393356323242, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 26.315263748168945, "step": 425} +{"train_info/time_between_train_steps": 0.00391077995300293, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 26.319511890411377, "step": 426} +{"train_info/time_between_train_steps": 0.0039021968841552734, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 26.278562307357788, "step": 427} +{"train_info/time_between_train_steps": 0.003827333450317383, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 26.348344564437866, "step": 428} +{"train_info/time_between_train_steps": 0.003854036331176758, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 26.281343936920166, "step": 429} +{"train_info/time_between_train_steps": 0.003950834274291992, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 26.31960892677307, "step": 430} +{"train_info/time_between_train_steps": 0.0043354034423828125, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 26.316428899765015, "step": 431} +{"train_info/time_between_train_steps": 0.004391908645629883, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 26.412007331848145, "step": 432} +{"train_info/time_between_train_steps": 0.004822492599487305, "step": 432} +{"train_info/time_between_train_steps": 16.586557626724243, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 26.2563579082489, "step": 433} +{"train_info/time_between_train_steps": 0.0037686824798583984, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 26.36132526397705, "step": 434} +{"train_info/time_between_train_steps": 0.003731966018676758, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 26.260727882385254, "step": 435} +{"train_info/time_between_train_steps": 0.0036802291870117188, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 26.418398141860962, "step": 436} +{"train_info/time_between_train_steps": 0.0038552284240722656, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 26.284741640090942, "step": 437} +{"train_info/time_between_train_steps": 0.0038979053497314453, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 26.347891092300415, "step": 438} +{"train_info/time_between_train_steps": 0.003953695297241211, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 26.313118934631348, "step": 439} +{"train_info/time_between_train_steps": 0.003962993621826172, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 26.34752368927002, "step": 440} +{"train_info/time_between_train_steps": 0.003888368606567383, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 26.298152446746826, "step": 441} +{"train_info/time_between_train_steps": 0.004013538360595703, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 26.42555260658264, "step": 442} +{"train_info/time_between_train_steps": 0.003981351852416992, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 26.334256410598755, "step": 443} +{"train_info/time_between_train_steps": 0.004082202911376953, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 26.361008167266846, "step": 444} +{"train_info/time_between_train_steps": 0.0039708614349365234, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 26.296332120895386, "step": 445} +{"train_info/time_between_train_steps": 0.004012107849121094, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 26.40019679069519, "step": 446} +{"train_info/time_between_train_steps": 0.003961324691772461, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 26.36438536643982, "step": 447} +{"train_info/time_between_train_steps": 0.003970623016357422, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 26.375534772872925, "step": 448} +{"train_info/time_between_train_steps": 0.004055500030517578, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 26.277848720550537, "step": 449} +{"train_info/time_between_train_steps": 0.021892547607421875, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 26.29235577583313, "step": 450} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740937765, "_runtime": 12271}, "step": 450} +{"logs": {"train/loss": 3.3399, "train/learning_rate": 0.00041666666666666664, "train/epoch": 12.02, "_timestamp": 1740937765, "_runtime": 12271}, "step": 450} +{"train_info/time_between_train_steps": 0.043674468994140625, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 26.274463415145874, "step": 451} +{"train_info/time_between_train_steps": 0.0037882328033447266, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 26.293376922607422, "step": 452} +{"train_info/time_between_train_steps": 0.004121065139770508, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 26.276193618774414, "step": 453} +{"train_info/time_between_train_steps": 0.0039598941802978516, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 26.25560474395752, "step": 454} +{"train_info/time_between_train_steps": 0.0038061141967773438, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 26.269553184509277, "step": 455} +{"train_info/time_between_train_steps": 0.0038275718688964844, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 26.272834300994873, "step": 456} +{"train_info/time_between_train_steps": 0.004058837890625, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 26.26793384552002, "step": 457} +{"train_info/time_between_train_steps": 0.003925800323486328, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 26.2747905254364, "step": 458} +{"train_info/time_between_train_steps": 0.0038552284240722656, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 26.285505771636963, "step": 459} +{"train_info/time_between_train_steps": 0.0038840770721435547, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 26.302300453186035, "step": 460} +{"train_info/time_between_train_steps": 0.0039517879486083984, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 26.317097902297974, "step": 461} +{"train_info/time_between_train_steps": 0.003954172134399414, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 26.27529263496399, "step": 462} +{"train_info/time_between_train_steps": 0.003924131393432617, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 26.40679383277893, "step": 463} +{"train_info/time_between_train_steps": 0.00396728515625, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 26.2663893699646, "step": 464} +{"train_info/time_between_train_steps": 0.003890514373779297, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 26.36448097229004, "step": 465} +{"train_info/time_between_train_steps": 0.00402069091796875, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 26.303375482559204, "step": 466} +{"train_info/time_between_train_steps": 0.004400730133056641, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 26.307247638702393, "step": 467} +{"train_info/time_between_train_steps": 0.004953622817993164, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 26.357328414916992, "step": 468} +{"train_info/time_between_train_steps": 0.0063893795013427734, "step": 468} +{"train_info/time_between_train_steps": 16.733958959579468, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 26.29728126525879, "step": 469} +{"train_info/time_between_train_steps": 0.0037517547607421875, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 26.35706639289856, "step": 470} +{"train_info/time_between_train_steps": 0.005466461181640625, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 26.3620822429657, "step": 471} +{"train_info/time_between_train_steps": 0.005603790283203125, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 26.58652639389038, "step": 472} +{"train_info/time_between_train_steps": 0.005759239196777344, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 26.268330812454224, "step": 473} +{"train_info/time_between_train_steps": 0.005881786346435547, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 26.379566431045532, "step": 474} +{"train_info/time_between_train_steps": 0.0040264129638671875, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 26.261463165283203, "step": 475} +{"train_info/time_between_train_steps": 0.004103660583496094, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 26.402122735977173, "step": 476} +{"train_info/time_between_train_steps": 0.004013776779174805, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 26.274023056030273, "step": 477} +{"train_info/time_between_train_steps": 0.00413966178894043, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 26.416663885116577, "step": 478} +{"train_info/time_between_train_steps": 0.003852367401123047, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 26.273401498794556, "step": 479} +{"train_info/time_between_train_steps": 0.004128932952880859, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 26.388001203536987, "step": 480} +{"train_info/time_between_train_steps": 0.00421452522277832, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 26.27571439743042, "step": 481} +{"train_info/time_between_train_steps": 0.004052877426147461, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 26.360706329345703, "step": 482} +{"train_info/time_between_train_steps": 0.0040247440338134766, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 26.308196783065796, "step": 483} +{"train_info/time_between_train_steps": 0.0041158199310302734, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 26.355737924575806, "step": 484} +{"train_info/time_between_train_steps": 0.004050016403198242, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 26.292778730392456, "step": 485} +{"train_info/time_between_train_steps": 0.014600753784179688, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 26.25828981399536, "step": 486} +{"train_info/time_between_train_steps": 0.0038170814514160156, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 26.29239535331726, "step": 487} +{"train_info/time_between_train_steps": 0.00394129753112793, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 26.32222890853882, "step": 488} +{"train_info/time_between_train_steps": 0.0038650035858154297, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 26.303547382354736, "step": 489} +{"train_info/time_between_train_steps": 0.004045963287353516, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 26.31853675842285, "step": 490} +{"train_info/time_between_train_steps": 0.0038971900939941406, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 26.26459264755249, "step": 491} +{"train_info/time_between_train_steps": 0.00395512580871582, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 26.320712089538574, "step": 492} +{"train_info/time_between_train_steps": 0.0038559436798095703, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 26.322659254074097, "step": 493} +{"train_info/time_between_train_steps": 0.003929853439331055, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 26.260823249816895, "step": 494} +{"train_info/time_between_train_steps": 0.003985166549682617, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 26.26031231880188, "step": 495} +{"train_info/time_between_train_steps": 0.0038597583770751953, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 26.258259534835815, "step": 496} +{"train_info/time_between_train_steps": 0.003871440887451172, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 26.298217296600342, "step": 497} +{"train_info/time_between_train_steps": 0.0038962364196777344, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 26.263651132583618, "step": 498} +{"train_info/time_between_train_steps": 0.003928422927856445, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 26.313246250152588, "step": 499} +{"train_info/time_between_train_steps": 0.0038580894470214844, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 26.263288259506226, "step": 500} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740939099, "_runtime": 13605}, "step": 500} +{"logs": {"train/loss": 3.2438, "train/learning_rate": 0.00038888888888888887, "train/epoch": 13.03, "_timestamp": 1740939099, "_runtime": 13605}, "step": 500} +{"train_info/time_between_train_steps": 31.485732316970825, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 26.40240740776062, "step": 501} +{"train_info/time_between_train_steps": 0.004248857498168945, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 26.388749361038208, "step": 502} +{"train_info/time_between_train_steps": 0.004321575164794922, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 26.310784816741943, "step": 503} +{"train_info/time_between_train_steps": 0.004864931106567383, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 26.33556365966797, "step": 504} +{"train_info/time_between_train_steps": 0.005018472671508789, "step": 504} +{"train_info/time_between_train_steps": 16.399948596954346, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 26.25981903076172, "step": 505} +{"train_info/time_between_train_steps": 0.003734111785888672, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 26.365266799926758, "step": 506} +{"train_info/time_between_train_steps": 0.0037882328033447266, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 26.30067729949951, "step": 507} +{"train_info/time_between_train_steps": 0.0036513805389404297, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 27.126553773880005, "step": 508} +{"train_info/time_between_train_steps": 0.004116535186767578, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 27.05583691596985, "step": 509} +{"train_info/time_between_train_steps": 0.0039653778076171875, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 26.378639221191406, "step": 510} +{"train_info/time_between_train_steps": 0.003934144973754883, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 26.293986558914185, "step": 511} +{"train_info/time_between_train_steps": 0.0039060115814208984, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 26.396429300308228, "step": 512} +{"train_info/time_between_train_steps": 0.004059553146362305, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 26.28357744216919, "step": 513} +{"train_info/time_between_train_steps": 0.004008054733276367, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 26.359660863876343, "step": 514} +{"train_info/time_between_train_steps": 0.004030704498291016, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 26.28844118118286, "step": 515} +{"train_info/time_between_train_steps": 0.004119873046875, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 26.499816417694092, "step": 516} +{"train_info/time_between_train_steps": 0.003950595855712891, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 26.309903144836426, "step": 517} +{"train_info/time_between_train_steps": 0.003893613815307617, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 26.367167234420776, "step": 518} +{"train_info/time_between_train_steps": 0.003939151763916016, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 26.32042670249939, "step": 519} +{"train_info/time_between_train_steps": 0.0038652420043945312, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 26.352087259292603, "step": 520} +{"train_info/time_between_train_steps": 0.004050254821777344, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 26.28604006767273, "step": 521} +{"train_info/time_between_train_steps": 0.015031099319458008, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 26.313687801361084, "step": 522} +{"train_info/time_between_train_steps": 0.003823518753051758, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 26.319024085998535, "step": 523} +{"train_info/time_between_train_steps": 0.0038216114044189453, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 26.339608430862427, "step": 524} +{"train_info/time_between_train_steps": 0.003880739212036133, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 26.280024528503418, "step": 525} +{"train_info/time_between_train_steps": 0.003802776336669922, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 26.289682865142822, "step": 526} +{"train_info/time_between_train_steps": 0.0038132667541503906, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 26.296444416046143, "step": 527} +{"train_info/time_between_train_steps": 0.003918886184692383, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 26.28252339363098, "step": 528} +{"train_info/time_between_train_steps": 0.003878355026245117, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 26.29608416557312, "step": 529} +{"train_info/time_between_train_steps": 0.003931283950805664, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 26.281938076019287, "step": 530} +{"train_info/time_between_train_steps": 0.0038971900939941406, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 26.28525710105896, "step": 531} +{"train_info/time_between_train_steps": 0.003889799118041992, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 26.30498194694519, "step": 532} +{"train_info/time_between_train_steps": 0.0038971900939941406, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 26.300567150115967, "step": 533} +{"train_info/time_between_train_steps": 0.003904581069946289, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 26.283340454101562, "step": 534} +{"train_info/time_between_train_steps": 0.00386810302734375, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 26.28324294090271, "step": 535} +{"train_info/time_between_train_steps": 0.004021883010864258, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 26.30966329574585, "step": 536} +{"train_info/time_between_train_steps": 0.004050493240356445, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 26.28920030593872, "step": 537} +{"train_info/time_between_train_steps": 0.004029989242553711, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 26.306071758270264, "step": 538} +{"train_info/time_between_train_steps": 0.004137992858886719, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 26.291159868240356, "step": 539} +{"train_info/time_between_train_steps": 0.004662752151489258, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 26.392902135849, "step": 540} +{"train_info/time_between_train_steps": 0.0048046112060546875, "step": 540} +{"train_info/time_between_train_steps": 16.90623140335083, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 26.260899782180786, "step": 541} +{"train_info/time_between_train_steps": 0.003728151321411133, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 26.32840871810913, "step": 542} +{"train_info/time_between_train_steps": 0.0037157535552978516, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 26.28153157234192, "step": 543} +{"train_info/time_between_train_steps": 0.004155874252319336, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 26.38822317123413, "step": 544} +{"train_info/time_between_train_steps": 0.003965854644775391, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 26.266066789627075, "step": 545} +{"train_info/time_between_train_steps": 0.004029273986816406, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 26.370834827423096, "step": 546} +{"train_info/time_between_train_steps": 0.0038840770721435547, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 26.27644443511963, "step": 547} +{"train_info/time_between_train_steps": 0.0040051937103271484, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 26.413436889648438, "step": 548} +{"train_info/time_between_train_steps": 0.003991127014160156, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 26.260321855545044, "step": 549} +{"train_info/time_between_train_steps": 0.003886699676513672, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 26.355294227600098, "step": 550} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740940483, "_runtime": 14989}, "step": 550} +{"logs": {"train/loss": 3.1962, "train/learning_rate": 0.0003611111111111111, "train/epoch": 15.01, "_timestamp": 1740940483, "_runtime": 14989}, "step": 550} +{"train_info/time_between_train_steps": 0.024851322174072266, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 26.307183742523193, "step": 551} +{"train_info/time_between_train_steps": 0.004088401794433594, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 26.365095853805542, "step": 552} +{"train_info/time_between_train_steps": 0.004036426544189453, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 26.35359263420105, "step": 553} +{"train_info/time_between_train_steps": 0.0038480758666992188, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 26.38925814628601, "step": 554} +{"train_info/time_between_train_steps": 0.003989696502685547, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 26.33761692047119, "step": 555} +{"train_info/time_between_train_steps": 0.003942012786865234, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 26.38613796234131, "step": 556} +{"train_info/time_between_train_steps": 0.003977775573730469, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 26.27562713623047, "step": 557} +{"train_info/time_between_train_steps": 0.017357349395751953, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 26.261228561401367, "step": 558} +{"train_info/time_between_train_steps": 0.003832101821899414, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 26.26491618156433, "step": 559} +{"train_info/time_between_train_steps": 0.003971099853515625, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 26.299482107162476, "step": 560} +{"train_info/time_between_train_steps": 0.003808259963989258, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 26.288994550704956, "step": 561} +{"train_info/time_between_train_steps": 0.0038480758666992188, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 26.293443202972412, "step": 562} +{"train_info/time_between_train_steps": 0.0038750171661376953, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 26.297470092773438, "step": 563} +{"train_info/time_between_train_steps": 0.0038521289825439453, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 26.28997015953064, "step": 564} +{"train_info/time_between_train_steps": 0.0038373470306396484, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 26.303242683410645, "step": 565} +{"train_info/time_between_train_steps": 0.003950357437133789, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 26.27607560157776, "step": 566} +{"train_info/time_between_train_steps": 0.003907680511474609, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 26.3220694065094, "step": 567} +{"train_info/time_between_train_steps": 0.003848552703857422, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 26.269208192825317, "step": 568} +{"train_info/time_between_train_steps": 0.003996372222900391, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 26.298341274261475, "step": 569} +{"train_info/time_between_train_steps": 0.0038356781005859375, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 26.355881690979004, "step": 570} +{"train_info/time_between_train_steps": 0.0038089752197265625, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 26.262640714645386, "step": 571} +{"train_info/time_between_train_steps": 0.0038928985595703125, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 28.45241403579712, "step": 572} +{"train_info/time_between_train_steps": 0.00404810905456543, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 28.927043676376343, "step": 573} +{"train_info/time_between_train_steps": 0.004183530807495117, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 29.40468668937683, "step": 574} +{"train_info/time_between_train_steps": 0.004532814025878906, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 28.613139390945435, "step": 575} +{"train_info/time_between_train_steps": 0.0046122074127197266, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 29.191765546798706, "step": 576} +{"train_info/time_between_train_steps": 0.0046215057373046875, "step": 576} +{"train_info/time_between_train_steps": 16.549514055252075, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 26.252597093582153, "step": 577} +{"train_info/time_between_train_steps": 0.003695964813232422, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 26.331225156784058, "step": 578} +{"train_info/time_between_train_steps": 0.003709077835083008, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 26.26661705970764, "step": 579} +{"train_info/time_between_train_steps": 0.003830432891845703, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 26.37618112564087, "step": 580} +{"train_info/time_between_train_steps": 0.003881216049194336, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 26.270763635635376, "step": 581} +{"train_info/time_between_train_steps": 0.0038671493530273438, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 26.35205578804016, "step": 582} +{"train_info/time_between_train_steps": 0.003930091857910156, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 26.272614240646362, "step": 583} +{"train_info/time_between_train_steps": 0.0038824081420898438, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 26.377257823944092, "step": 584} +{"train_info/time_between_train_steps": 0.0038993358612060547, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 26.329920768737793, "step": 585} +{"train_info/time_between_train_steps": 0.0038635730743408203, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 26.36434316635132, "step": 586} +{"train_info/time_between_train_steps": 0.003981828689575195, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 26.2805814743042, "step": 587} +{"train_info/time_between_train_steps": 0.00405573844909668, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 26.35051417350769, "step": 588} +{"train_info/time_between_train_steps": 0.004008293151855469, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 26.267319440841675, "step": 589} +{"train_info/time_between_train_steps": 0.003976583480834961, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 26.362786531448364, "step": 590} +{"train_info/time_between_train_steps": 0.00382232666015625, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 26.40007472038269, "step": 591} +{"train_info/time_between_train_steps": 0.0037784576416015625, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 26.408693313598633, "step": 592} +{"train_info/time_between_train_steps": 0.003947734832763672, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 26.32305121421814, "step": 593} +{"train_info/time_between_train_steps": 0.01463007926940918, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 26.267627000808716, "step": 594} +{"train_info/time_between_train_steps": 0.003687143325805664, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 26.294771671295166, "step": 595} +{"train_info/time_between_train_steps": 0.003713369369506836, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 26.303279399871826, "step": 596} +{"train_info/time_between_train_steps": 0.005456686019897461, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 26.298689603805542, "step": 597} +{"train_info/time_between_train_steps": 0.0037937164306640625, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 26.292630672454834, "step": 598} +{"train_info/time_between_train_steps": 0.0038003921508789062, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 26.275687217712402, "step": 599} +{"train_info/time_between_train_steps": 0.0038940906524658203, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 26.31163763999939, "step": 600} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740941838, "_runtime": 16344}, "step": 600} +{"logs": {"train/loss": 3.0903, "train/learning_rate": 0.0003333333333333333, "train/epoch": 16.02, "_timestamp": 1740941838, "_runtime": 16344}, "step": 600} +{"train_info/time_between_train_steps": 115.62986183166504, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 26.434001922607422, "step": 601} +{"train_info/time_between_train_steps": 0.00401759147644043, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 28.69110679626465, "step": 602} +{"train_info/time_between_train_steps": 0.004048824310302734, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 26.781887531280518, "step": 603} +{"train_info/time_between_train_steps": 0.003911495208740234, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 26.346909761428833, "step": 604} +{"train_info/time_between_train_steps": 0.003926277160644531, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 26.287867069244385, "step": 605} +{"train_info/time_between_train_steps": 0.0038177967071533203, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 26.277366399765015, "step": 606} +{"train_info/time_between_train_steps": 0.0037848949432373047, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 26.257330656051636, "step": 607} +{"train_info/time_between_train_steps": 0.0038678646087646484, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 26.269975185394287, "step": 608} +{"train_info/time_between_train_steps": 0.0038640499114990234, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 26.30725121498108, "step": 609} +{"train_info/time_between_train_steps": 0.004083156585693359, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 26.27578568458557, "step": 610} +{"train_info/time_between_train_steps": 0.004180431365966797, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 26.312291383743286, "step": 611} +{"train_info/time_between_train_steps": 0.00430750846862793, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 26.298668384552002, "step": 612} +{"train_info/time_between_train_steps": 0.00468134880065918, "step": 612} +{"train_info/time_between_train_steps": 16.31327247619629, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 26.26223874092102, "step": 613} +{"train_info/time_between_train_steps": 0.003696918487548828, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 26.33203935623169, "step": 614} +{"train_info/time_between_train_steps": 0.0037751197814941406, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 26.27633023262024, "step": 615} +{"train_info/time_between_train_steps": 0.0036873817443847656, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 26.393662929534912, "step": 616} +{"train_info/time_between_train_steps": 0.0037512779235839844, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 26.29020047187805, "step": 617} +{"train_info/time_between_train_steps": 0.0037245750427246094, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 26.346272706985474, "step": 618} +{"train_info/time_between_train_steps": 0.003765583038330078, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 26.292070627212524, "step": 619} +{"train_info/time_between_train_steps": 0.0038967132568359375, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 26.37185025215149, "step": 620} +{"train_info/time_between_train_steps": 0.0038650035858154297, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 26.257794857025146, "step": 621} +{"train_info/time_between_train_steps": 0.003839254379272461, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 26.36053991317749, "step": 622} +{"train_info/time_between_train_steps": 0.003924369812011719, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 26.267229080200195, "step": 623} +{"train_info/time_between_train_steps": 0.0038819313049316406, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 26.349353075027466, "step": 624} +{"train_info/time_between_train_steps": 0.003936767578125, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 26.25895094871521, "step": 625} +{"train_info/time_between_train_steps": 0.003956317901611328, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 26.35849928855896, "step": 626} +{"train_info/time_between_train_steps": 0.003945589065551758, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 26.266907453536987, "step": 627} +{"train_info/time_between_train_steps": 0.003953218460083008, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 26.34054923057556, "step": 628} +{"train_info/time_between_train_steps": 0.003966808319091797, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 26.265522718429565, "step": 629} +{"train_info/time_between_train_steps": 0.015276432037353516, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 26.255936861038208, "step": 630} +{"train_info/time_between_train_steps": 0.0036978721618652344, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 26.254347562789917, "step": 631} +{"train_info/time_between_train_steps": 0.0037088394165039062, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 26.312943696975708, "step": 632} +{"train_info/time_between_train_steps": 0.0037429332733154297, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 26.287460803985596, "step": 633} +{"train_info/time_between_train_steps": 0.003793954849243164, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 26.26660132408142, "step": 634} +{"train_info/time_between_train_steps": 0.0038361549377441406, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 26.29890465736389, "step": 635} +{"train_info/time_between_train_steps": 0.003863096237182617, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 26.301360607147217, "step": 636} +{"train_info/time_between_train_steps": 0.003873586654663086, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 26.265519380569458, "step": 637} +{"train_info/time_between_train_steps": 0.003881216049194336, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 26.322997331619263, "step": 638} +{"train_info/time_between_train_steps": 0.0038025379180908203, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 26.269788026809692, "step": 639} +{"train_info/time_between_train_steps": 0.0038383007049560547, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 26.2961368560791, "step": 640} +{"train_info/time_between_train_steps": 0.0038394927978515625, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 26.30130624771118, "step": 641} +{"train_info/time_between_train_steps": 0.003942012786865234, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 26.29426598548889, "step": 642} +{"train_info/time_between_train_steps": 0.003797769546508789, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 26.293027639389038, "step": 643} +{"train_info/time_between_train_steps": 0.0038154125213623047, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 26.291371822357178, "step": 644} +{"train_info/time_between_train_steps": 0.004014253616333008, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 26.300256490707397, "step": 645} +{"train_info/time_between_train_steps": 0.0038836002349853516, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 26.275596857070923, "step": 646} +{"train_info/time_between_train_steps": 0.004086732864379883, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 26.37663769721985, "step": 647} +{"train_info/time_between_train_steps": 0.004442453384399414, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 26.3019859790802, "step": 648} +{"train_info/time_between_train_steps": 0.004499673843383789, "step": 648} +{"train_info/time_between_train_steps": 16.42504596710205, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 26.290907382965088, "step": 649} +{"train_info/time_between_train_steps": 0.0037636756896972656, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 26.330177545547485, "step": 650} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740943309, "_runtime": 17815}, "step": 650} +{"logs": {"train/loss": 3.0601, "train/learning_rate": 0.00030555555555555555, "train/epoch": 18.0, "_timestamp": 1740943309, "_runtime": 17815}, "step": 650} +{"train_info/time_between_train_steps": 0.04331564903259277, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 26.295965433120728, "step": 651} +{"train_info/time_between_train_steps": 0.0036764144897460938, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 26.334989309310913, "step": 652} +{"train_info/time_between_train_steps": 0.0038018226623535156, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 26.263429641723633, "step": 653} +{"train_info/time_between_train_steps": 0.005986928939819336, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 26.36741876602173, "step": 654} +{"train_info/time_between_train_steps": 0.006015300750732422, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 26.27382779121399, "step": 655} +{"train_info/time_between_train_steps": 0.005713462829589844, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 26.347060441970825, "step": 656} +{"train_info/time_between_train_steps": 0.00582575798034668, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 26.261723279953003, "step": 657} +{"train_info/time_between_train_steps": 0.005723476409912109, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 26.35244107246399, "step": 658} +{"train_info/time_between_train_steps": 0.0037996768951416016, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 26.265893697738647, "step": 659} +{"train_info/time_between_train_steps": 0.0037450790405273438, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 26.37249994277954, "step": 660} +{"train_info/time_between_train_steps": 0.005650997161865234, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 26.264864206314087, "step": 661} +{"train_info/time_between_train_steps": 0.004036664962768555, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 26.360647678375244, "step": 662} +{"train_info/time_between_train_steps": 0.0040051937103271484, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 26.329755783081055, "step": 663} +{"train_info/time_between_train_steps": 0.0038416385650634766, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 26.347329139709473, "step": 664} +{"train_info/time_between_train_steps": 0.003950834274291992, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 26.273025035858154, "step": 665} +{"train_info/time_between_train_steps": 0.014797449111938477, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 26.25707507133484, "step": 666} +{"train_info/time_between_train_steps": 0.0036754608154296875, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 26.251194715499878, "step": 667} +{"train_info/time_between_train_steps": 0.0038590431213378906, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 26.255577564239502, "step": 668} +{"train_info/time_between_train_steps": 0.003865480422973633, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 26.257568836212158, "step": 669} +{"train_info/time_between_train_steps": 0.003979206085205078, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 26.256871223449707, "step": 670} +{"train_info/time_between_train_steps": 0.0037870407104492188, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 28.565838098526, "step": 671} +{"train_info/time_between_train_steps": 0.003998994827270508, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 29.41464066505432, "step": 672} +{"train_info/time_between_train_steps": 0.004072666168212891, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 28.605728149414062, "step": 673} +{"train_info/time_between_train_steps": 0.004069089889526367, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 29.30006170272827, "step": 674} +{"train_info/time_between_train_steps": 0.00413060188293457, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 27.396986484527588, "step": 675} +{"train_info/time_between_train_steps": 0.004195213317871094, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 28.196874380111694, "step": 676} +{"train_info/time_between_train_steps": 0.003796815872192383, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 26.27568030357361, "step": 677} +{"train_info/time_between_train_steps": 0.003891468048095703, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 26.334126234054565, "step": 678} +{"train_info/time_between_train_steps": 0.003819704055786133, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 26.258179664611816, "step": 679} +{"train_info/time_between_train_steps": 0.0038585662841796875, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 26.321382522583008, "step": 680} +{"train_info/time_between_train_steps": 0.0039522647857666016, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 26.271158933639526, "step": 681} +{"train_info/time_between_train_steps": 0.004072666168212891, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 26.31499171257019, "step": 682} +{"train_info/time_between_train_steps": 0.004196882247924805, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 26.308403730392456, "step": 683} +{"train_info/time_between_train_steps": 0.004433393478393555, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 26.30239748954773, "step": 684} +{"train_info/time_between_train_steps": 0.004465579986572266, "step": 684} +{"train_info/time_between_train_steps": 16.569295406341553, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 26.25857448577881, "step": 685} +{"train_info/time_between_train_steps": 0.0036890506744384766, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 26.354584455490112, "step": 686} +{"train_info/time_between_train_steps": 0.003741741180419922, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 26.254604816436768, "step": 687} +{"train_info/time_between_train_steps": 0.0036973953247070312, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 26.407933712005615, "step": 688} +{"train_info/time_between_train_steps": 0.0038919448852539062, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 26.276859521865845, "step": 689} +{"train_info/time_between_train_steps": 0.004034757614135742, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 26.35619330406189, "step": 690} +{"train_info/time_between_train_steps": 0.004064083099365234, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 26.264286279678345, "step": 691} +{"train_info/time_between_train_steps": 0.0039033889770507812, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 26.359114408493042, "step": 692} +{"train_info/time_between_train_steps": 0.003893613815307617, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 26.348825693130493, "step": 693} +{"train_info/time_between_train_steps": 0.0039520263671875, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 26.70827317237854, "step": 694} +{"train_info/time_between_train_steps": 0.004185676574707031, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 28.14995789527893, "step": 695} +{"train_info/time_between_train_steps": 0.0041506290435791016, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 27.368081092834473, "step": 696} +{"train_info/time_between_train_steps": 0.004077911376953125, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 26.3475239276886, "step": 697} +{"train_info/time_between_train_steps": 0.0041065216064453125, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 26.410454511642456, "step": 698} +{"train_info/time_between_train_steps": 0.00406956672668457, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 26.289648294448853, "step": 699} +{"train_info/time_between_train_steps": 0.003977537155151367, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 26.38025951385498, "step": 700} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740944669, "_runtime": 19175}, "step": 700} +{"logs": {"train/loss": 2.9726, "train/learning_rate": 0.0002777777777777778, "train/epoch": 19.01, "_timestamp": 1740944669, "_runtime": 19175}, "step": 700} +{"train_info/time_between_train_steps": 103.28658771514893, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 26.408254146575928, "step": 701} +{"train_info/time_between_train_steps": 0.0194547176361084, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 26.417887449264526, "step": 702} +{"train_info/time_between_train_steps": 0.0038957595825195312, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 26.396651029586792, "step": 703} +{"train_info/time_between_train_steps": 0.0037658214569091797, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 26.37816333770752, "step": 704} +{"train_info/time_between_train_steps": 0.0037887096405029297, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 26.331677436828613, "step": 705} +{"train_info/time_between_train_steps": 0.003890514373779297, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 26.26013422012329, "step": 706} +{"train_info/time_between_train_steps": 0.003852367401123047, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 26.288859605789185, "step": 707} +{"train_info/time_between_train_steps": 0.0039637088775634766, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 26.279316186904907, "step": 708} +{"train_info/time_between_train_steps": 0.0038802623748779297, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 26.32910132408142, "step": 709} +{"train_info/time_between_train_steps": 0.00390934944152832, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 26.295532941818237, "step": 710} +{"train_info/time_between_train_steps": 0.003826141357421875, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 26.2899112701416, "step": 711} +{"train_info/time_between_train_steps": 0.003875255584716797, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 26.30981683731079, "step": 712} +{"train_info/time_between_train_steps": 0.0038306713104248047, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 26.258656978607178, "step": 713} +{"train_info/time_between_train_steps": 0.0038313865661621094, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 26.33165168762207, "step": 714} +{"train_info/time_between_train_steps": 0.0038089752197265625, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 26.27479600906372, "step": 715} +{"train_info/time_between_train_steps": 0.0038764476776123047, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 26.29524040222168, "step": 716} +{"train_info/time_between_train_steps": 0.003952741622924805, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 26.31106686592102, "step": 717} +{"train_info/time_between_train_steps": 0.003996133804321289, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 26.278234720230103, "step": 718} +{"train_info/time_between_train_steps": 0.004160404205322266, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 26.30810236930847, "step": 719} +{"train_info/time_between_train_steps": 0.0043523311614990234, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 26.31121826171875, "step": 720} +{"train_info/time_between_train_steps": 0.004580259323120117, "step": 720} +{"train_info/time_between_train_steps": 16.31877040863037, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 26.25040888786316, "step": 721} +{"train_info/time_between_train_steps": 0.0037496089935302734, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 26.33076524734497, "step": 722} +{"train_info/time_between_train_steps": 0.0038242340087890625, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 26.24876856803894, "step": 723} +{"train_info/time_between_train_steps": 0.003754138946533203, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 26.410770177841187, "step": 724} +{"train_info/time_between_train_steps": 0.0038700103759765625, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 26.26077175140381, "step": 725} +{"train_info/time_between_train_steps": 0.003826141357421875, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 26.347083806991577, "step": 726} +{"train_info/time_between_train_steps": 0.003881216049194336, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 26.29231858253479, "step": 727} +{"train_info/time_between_train_steps": 0.003927707672119141, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 26.362375736236572, "step": 728} +{"train_info/time_between_train_steps": 0.003916263580322266, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 26.27252721786499, "step": 729} +{"train_info/time_between_train_steps": 0.003942012786865234, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 26.35464310646057, "step": 730} +{"train_info/time_between_train_steps": 0.003832101821899414, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 26.307148218154907, "step": 731} +{"train_info/time_between_train_steps": 0.003928184509277344, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 26.35213851928711, "step": 732} +{"train_info/time_between_train_steps": 0.003979921340942383, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 26.284088611602783, "step": 733} +{"train_info/time_between_train_steps": 0.004032135009765625, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 26.404606580734253, "step": 734} +{"train_info/time_between_train_steps": 0.003918170928955078, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 26.282552003860474, "step": 735} +{"train_info/time_between_train_steps": 0.003926753997802734, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 26.36944007873535, "step": 736} +{"train_info/time_between_train_steps": 0.004001140594482422, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 26.29708480834961, "step": 737} +{"train_info/time_between_train_steps": 0.014623165130615234, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 26.26643204689026, "step": 738} +{"train_info/time_between_train_steps": 0.003696918487548828, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 26.292712688446045, "step": 739} +{"train_info/time_between_train_steps": 0.003823518753051758, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 26.329716205596924, "step": 740} +{"train_info/time_between_train_steps": 0.0051233768463134766, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 26.314059019088745, "step": 741} +{"train_info/time_between_train_steps": 0.003994464874267578, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 26.27845311164856, "step": 742} +{"train_info/time_between_train_steps": 0.0037915706634521484, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 26.328334093093872, "step": 743} +{"train_info/time_between_train_steps": 0.0039119720458984375, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 26.272163152694702, "step": 744} +{"train_info/time_between_train_steps": 0.003835916519165039, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 26.292914390563965, "step": 745} +{"train_info/time_between_train_steps": 0.0038890838623046875, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 26.277397394180298, "step": 746} +{"train_info/time_between_train_steps": 0.003953218460083008, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 26.291822910308838, "step": 747} +{"train_info/time_between_train_steps": 0.003868579864501953, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 26.307687044143677, "step": 748} +{"train_info/time_between_train_steps": 0.003818035125732422, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 26.27634882926941, "step": 749} +{"train_info/time_between_train_steps": 0.003792285919189453, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 26.318910121917725, "step": 750} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740946106, "_runtime": 20612}, "step": 750} +{"logs": {"train/loss": 2.9202, "train/learning_rate": 0.00025, "train/epoch": 20.02, "_timestamp": 1740946106, "_runtime": 20612}, "step": 750} +{"train_info/time_between_train_steps": 0.024709463119506836, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 26.25811529159546, "step": 751} +{"train_info/time_between_train_steps": 0.003858804702758789, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 26.296481609344482, "step": 752} +{"train_info/time_between_train_steps": 0.0039632320404052734, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 26.280694723129272, "step": 753} +{"train_info/time_between_train_steps": 0.003968715667724609, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 26.29135274887085, "step": 754} +{"train_info/time_between_train_steps": 0.004039287567138672, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 26.37804388999939, "step": 755} +{"train_info/time_between_train_steps": 0.004372119903564453, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 26.30467987060547, "step": 756} +{"train_info/time_between_train_steps": 0.004595518112182617, "step": 756} +{"train_info/time_between_train_steps": 16.601550340652466, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 26.31273365020752, "step": 757} +{"train_info/time_between_train_steps": 0.00383758544921875, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 26.424458265304565, "step": 758} +{"train_info/time_between_train_steps": 0.0041027069091796875, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 26.365655422210693, "step": 759} +{"train_info/time_between_train_steps": 0.00398707389831543, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 26.50072932243347, "step": 760} +{"train_info/time_between_train_steps": 0.004046916961669922, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 26.3168523311615, "step": 761} +{"train_info/time_between_train_steps": 0.004070281982421875, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 26.404399871826172, "step": 762} +{"train_info/time_between_train_steps": 0.0039632320404052734, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 26.32519793510437, "step": 763} +{"train_info/time_between_train_steps": 0.004076480865478516, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 26.414039373397827, "step": 764} +{"train_info/time_between_train_steps": 0.004311323165893555, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 26.34158682823181, "step": 765} +{"train_info/time_between_train_steps": 0.005937099456787109, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 26.468576192855835, "step": 766} +{"train_info/time_between_train_steps": 0.004638671875, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 26.306243896484375, "step": 767} +{"train_info/time_between_train_steps": 0.004714012145996094, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 26.39190101623535, "step": 768} +{"train_info/time_between_train_steps": 0.0046918392181396484, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 26.313913345336914, "step": 769} +{"train_info/time_between_train_steps": 0.004720926284790039, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 26.46070122718811, "step": 770} +{"train_info/time_between_train_steps": 0.004344940185546875, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 26.441100120544434, "step": 771} +{"train_info/time_between_train_steps": 0.00436711311340332, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 26.498768091201782, "step": 772} +{"train_info/time_between_train_steps": 0.004973411560058594, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 26.306537866592407, "step": 773} +{"train_info/time_between_train_steps": 0.01996326446533203, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 27.052602529525757, "step": 774} +{"train_info/time_between_train_steps": 0.004276752471923828, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 26.278563499450684, "step": 775} +{"train_info/time_between_train_steps": 0.004130840301513672, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 26.299445629119873, "step": 776} +{"train_info/time_between_train_steps": 0.004185676574707031, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 26.307396411895752, "step": 777} +{"train_info/time_between_train_steps": 0.0043964385986328125, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 26.31987953186035, "step": 778} +{"train_info/time_between_train_steps": 0.0045626163482666016, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 26.494134426116943, "step": 779} +{"train_info/time_between_train_steps": 0.00441288948059082, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 26.285075187683105, "step": 780} +{"train_info/time_between_train_steps": 0.004447221755981445, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 26.305384874343872, "step": 781} +{"train_info/time_between_train_steps": 0.004107236862182617, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 26.28218936920166, "step": 782} +{"train_info/time_between_train_steps": 0.0041806697845458984, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 26.285902976989746, "step": 783} +{"train_info/time_between_train_steps": 0.004178285598754883, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 26.31264877319336, "step": 784} +{"train_info/time_between_train_steps": 0.004261016845703125, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 28.23668384552002, "step": 785} +{"train_info/time_between_train_steps": 0.004478931427001953, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 29.204496145248413, "step": 786} +{"train_info/time_between_train_steps": 0.004427194595336914, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 43.22826886177063, "step": 787} +{"train_info/time_between_train_steps": 0.004454374313354492, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 27.154877424240112, "step": 788} +{"train_info/time_between_train_steps": 0.00452423095703125, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 30.83605194091797, "step": 789} +{"train_info/time_between_train_steps": 0.004537105560302734, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 29.088077068328857, "step": 790} +{"train_info/time_between_train_steps": 0.004494667053222656, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 28.597967624664307, "step": 791} +{"train_info/time_between_train_steps": 0.005411863327026367, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 26.33359980583191, "step": 792} +{"train_info/time_between_train_steps": 0.0051038265228271484, "step": 792} +{"train_info/time_between_train_steps": 16.606305599212646, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 26.26816964149475, "step": 793} +{"train_info/time_between_train_steps": 0.003660440444946289, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 26.327685356140137, "step": 794} +{"train_info/time_between_train_steps": 0.0036842823028564453, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 26.269789457321167, "step": 795} +{"train_info/time_between_train_steps": 0.003660440444946289, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 26.39890718460083, "step": 796} +{"train_info/time_between_train_steps": 0.003871917724609375, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 26.266342163085938, "step": 797} +{"train_info/time_between_train_steps": 0.003775358200073242, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 26.353612661361694, "step": 798} +{"train_info/time_between_train_steps": 0.0039746761322021484, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 26.267961263656616, "step": 799} +{"train_info/time_between_train_steps": 0.003879547119140625, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 26.3480966091156, "step": 800} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740947505, "_runtime": 22011}, "step": 800} +{"logs": {"train/loss": 2.9065, "train/learning_rate": 0.00022222222222222218, "train/epoch": 22.01, "_timestamp": 1740947505, "_runtime": 22011}, "step": 800} +{"train_info/time_between_train_steps": 29.94892430305481, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 26.47144103050232, "step": 801} +{"train_info/time_between_train_steps": 0.004175424575805664, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 26.399080991744995, "step": 802} +{"train_info/time_between_train_steps": 0.0038590431213378906, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 26.300694465637207, "step": 803} +{"train_info/time_between_train_steps": 0.004017353057861328, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 26.348857641220093, "step": 804} +{"train_info/time_between_train_steps": 0.003981351852416992, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 26.257832288742065, "step": 805} +{"train_info/time_between_train_steps": 0.003923892974853516, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 26.355958938598633, "step": 806} +{"train_info/time_between_train_steps": 0.003912687301635742, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 26.28584575653076, "step": 807} +{"train_info/time_between_train_steps": 0.003908634185791016, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 26.378761291503906, "step": 808} +{"train_info/time_between_train_steps": 0.004043102264404297, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 26.28339123725891, "step": 809} +{"train_info/time_between_train_steps": 0.016525983810424805, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 26.288527011871338, "step": 810} +{"train_info/time_between_train_steps": 0.0038361549377441406, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 26.28990077972412, "step": 811} +{"train_info/time_between_train_steps": 0.0036911964416503906, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 26.292800664901733, "step": 812} +{"train_info/time_between_train_steps": 0.0037755966186523438, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 26.25166964530945, "step": 813} +{"train_info/time_between_train_steps": 0.0038459300994873047, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 26.337145805358887, "step": 814} +{"train_info/time_between_train_steps": 0.0038967132568359375, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 26.263012170791626, "step": 815} +{"train_info/time_between_train_steps": 0.0038700103759765625, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 26.299386739730835, "step": 816} +{"train_info/time_between_train_steps": 0.0038979053497314453, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 26.350825786590576, "step": 817} +{"train_info/time_between_train_steps": 0.004084348678588867, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 26.259752988815308, "step": 818} +{"train_info/time_between_train_steps": 0.0038557052612304688, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 26.2675724029541, "step": 819} +{"train_info/time_between_train_steps": 0.003838777542114258, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 26.282132625579834, "step": 820} +{"train_info/time_between_train_steps": 0.003835439682006836, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 26.27952003479004, "step": 821} +{"train_info/time_between_train_steps": 0.003949642181396484, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 26.265228986740112, "step": 822} +{"train_info/time_between_train_steps": 0.0040149688720703125, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 26.28528666496277, "step": 823} +{"train_info/time_between_train_steps": 0.0038344860076904297, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 26.266160249710083, "step": 824} +{"train_info/time_between_train_steps": 0.0039136409759521484, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 26.300284147262573, "step": 825} +{"train_info/time_between_train_steps": 0.0038597583770751953, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 26.27899742126465, "step": 826} +{"train_info/time_between_train_steps": 0.004121541976928711, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 26.301050424575806, "step": 827} +{"train_info/time_between_train_steps": 0.004076957702636719, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 26.301817178726196, "step": 828} +{"train_info/time_between_train_steps": 0.00453639030456543, "step": 828} +{"train_info/time_between_train_steps": 16.277201652526855, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 26.249916315078735, "step": 829} +{"train_info/time_between_train_steps": 0.0038115978240966797, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 26.3697988986969, "step": 830} +{"train_info/time_between_train_steps": 0.003947734832763672, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 26.26468014717102, "step": 831} +{"train_info/time_between_train_steps": 0.003954410552978516, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 26.40714430809021, "step": 832} +{"train_info/time_between_train_steps": 0.0039052963256835938, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 26.264116048812866, "step": 833} +{"train_info/time_between_train_steps": 0.003920793533325195, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 26.358869552612305, "step": 834} +{"train_info/time_between_train_steps": 0.0039882659912109375, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 26.273829698562622, "step": 835} +{"train_info/time_between_train_steps": 0.003910064697265625, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 26.358705759048462, "step": 836} +{"train_info/time_between_train_steps": 0.0039899349212646484, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 26.26307773590088, "step": 837} +{"train_info/time_between_train_steps": 0.0038671493530273438, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 26.37277364730835, "step": 838} +{"train_info/time_between_train_steps": 0.003973245620727539, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 26.286062240600586, "step": 839} +{"train_info/time_between_train_steps": 0.004019260406494141, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 26.34644889831543, "step": 840} +{"train_info/time_between_train_steps": 0.0038785934448242188, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 26.296891450881958, "step": 841} +{"train_info/time_between_train_steps": 0.003907918930053711, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 26.35056710243225, "step": 842} +{"train_info/time_between_train_steps": 0.00394749641418457, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 26.267361402511597, "step": 843} +{"train_info/time_between_train_steps": 0.003964662551879883, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 26.349563121795654, "step": 844} +{"train_info/time_between_train_steps": 0.004069328308105469, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 26.29397416114807, "step": 845} +{"train_info/time_between_train_steps": 0.014411687850952148, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 26.332334995269775, "step": 846} +{"train_info/time_between_train_steps": 0.0037229061126708984, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 26.33921194076538, "step": 847} +{"train_info/time_between_train_steps": 0.00382232666015625, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 26.251734256744385, "step": 848} +{"train_info/time_between_train_steps": 0.003718137741088867, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 26.274970769882202, "step": 849} +{"train_info/time_between_train_steps": 0.0037081241607666016, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 26.330687999725342, "step": 850} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740948869, "_runtime": 23375}, "step": 850} +{"logs": {"train/loss": 2.8338, "train/learning_rate": 0.00019444444444444443, "train/epoch": 23.02, "_timestamp": 1740948869, "_runtime": 23375}, "step": 850} +{"train_info/time_between_train_steps": 0.06052517890930176, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 26.269150257110596, "step": 851} +{"train_info/time_between_train_steps": 0.003731966018676758, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 26.259031057357788, "step": 852} +{"train_info/time_between_train_steps": 0.0037834644317626953, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 26.276447534561157, "step": 853} +{"train_info/time_between_train_steps": 0.0037593841552734375, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 26.251208066940308, "step": 854} +{"train_info/time_between_train_steps": 0.003792285919189453, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 26.26780414581299, "step": 855} +{"train_info/time_between_train_steps": 0.003712177276611328, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 26.278213500976562, "step": 856} +{"train_info/time_between_train_steps": 0.003760099411010742, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 26.306896448135376, "step": 857} +{"train_info/time_between_train_steps": 0.0037605762481689453, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 26.267725467681885, "step": 858} +{"train_info/time_between_train_steps": 0.0038034915924072266, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 26.314923524856567, "step": 859} +{"train_info/time_between_train_steps": 0.003712892532348633, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 26.274425745010376, "step": 860} +{"train_info/time_between_train_steps": 0.0037076473236083984, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 26.27918791770935, "step": 861} +{"train_info/time_between_train_steps": 0.0039021968841552734, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 26.296741247177124, "step": 862} +{"train_info/time_between_train_steps": 0.0038979053497314453, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 26.370797157287598, "step": 863} +{"train_info/time_between_train_steps": 0.00409388542175293, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 26.318805694580078, "step": 864} +{"train_info/time_between_train_steps": 0.004296064376831055, "step": 864} +{"train_info/time_between_train_steps": 16.366183757781982, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 26.291226387023926, "step": 865} +{"train_info/time_between_train_steps": 0.003783702850341797, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 26.406798124313354, "step": 866} +{"train_info/time_between_train_steps": 0.004152774810791016, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 26.298722505569458, "step": 867} +{"train_info/time_between_train_steps": 0.004155158996582031, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 26.430633306503296, "step": 868} +{"train_info/time_between_train_steps": 0.004078865051269531, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 26.28180718421936, "step": 869} +{"train_info/time_between_train_steps": 0.004042625427246094, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 26.38529372215271, "step": 870} +{"train_info/time_between_train_steps": 0.004087924957275391, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 26.421374559402466, "step": 871} +{"train_info/time_between_train_steps": 0.0043375492095947266, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 26.915485858917236, "step": 872} +{"train_info/time_between_train_steps": 0.0039441585540771484, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 26.263185262680054, "step": 873} +{"train_info/time_between_train_steps": 0.004090785980224609, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 27.059112071990967, "step": 874} +{"train_info/time_between_train_steps": 0.004290103912353516, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 27.05005645751953, "step": 875} +{"train_info/time_between_train_steps": 0.004112720489501953, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 26.374002933502197, "step": 876} +{"train_info/time_between_train_steps": 0.0040509700775146484, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 26.275654315948486, "step": 877} +{"train_info/time_between_train_steps": 0.004183292388916016, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 26.46567392349243, "step": 878} +{"train_info/time_between_train_steps": 0.004179477691650391, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 26.314146041870117, "step": 879} +{"train_info/time_between_train_steps": 0.0039403438568115234, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 26.323997497558594, "step": 880} +{"train_info/time_between_train_steps": 0.0043010711669921875, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 27.100568771362305, "step": 881} +{"train_info/time_between_train_steps": 0.014770746231079102, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 26.256505727767944, "step": 882} +{"train_info/time_between_train_steps": 0.003781557083129883, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 26.270450830459595, "step": 883} +{"train_info/time_between_train_steps": 0.003935813903808594, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 26.254895210266113, "step": 884} +{"train_info/time_between_train_steps": 0.003838062286376953, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 26.252756595611572, "step": 885} +{"train_info/time_between_train_steps": 0.0038497447967529297, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 26.26406478881836, "step": 886} +{"train_info/time_between_train_steps": 0.0038111209869384766, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 26.265434980392456, "step": 887} +{"train_info/time_between_train_steps": 0.0037870407104492188, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 26.27532434463501, "step": 888} +{"train_info/time_between_train_steps": 0.003972768783569336, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 26.260868549346924, "step": 889} +{"train_info/time_between_train_steps": 0.00392603874206543, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 26.293582439422607, "step": 890} +{"train_info/time_between_train_steps": 0.0038323402404785156, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 26.28931975364685, "step": 891} +{"train_info/time_between_train_steps": 0.0038619041442871094, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 26.285564661026, "step": 892} +{"train_info/time_between_train_steps": 0.003825664520263672, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 26.270601272583008, "step": 893} +{"train_info/time_between_train_steps": 0.0040090084075927734, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 26.584596633911133, "step": 894} +{"train_info/time_between_train_steps": 0.0039060115814208984, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 26.268284797668457, "step": 895} +{"train_info/time_between_train_steps": 0.003835916519165039, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 26.26884627342224, "step": 896} +{"train_info/time_between_train_steps": 0.0038309097290039062, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 26.34775471687317, "step": 897} +{"train_info/time_between_train_steps": 0.003905773162841797, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 26.274231910705566, "step": 898} +{"train_info/time_between_train_steps": 0.004227638244628906, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 45.70286798477173, "step": 899} +{"train_info/time_between_train_steps": 0.004399538040161133, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 28.636050939559937, "step": 900} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740950243, "_runtime": 24749}, "step": 900} +{"logs": {"train/loss": 2.7941, "train/learning_rate": 0.00016666666666666666, "train/epoch": 24.03, "_timestamp": 1740950243, "_runtime": 24749}, "step": 900} +{"train_info/time_between_train_steps": 126.94105172157288, "step": 900} +{"train_info/time_between_train_steps": 143.48236203193665, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 26.428534984588623, "step": 901} +{"train_info/time_between_train_steps": 0.003792285919189453, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 26.48217248916626, "step": 902} +{"train_info/time_between_train_steps": 0.0037975311279296875, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 26.406275033950806, "step": 903} +{"train_info/time_between_train_steps": 0.003896951675415039, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 26.97864580154419, "step": 904} +{"train_info/time_between_train_steps": 0.0038449764251708984, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 26.338940143585205, "step": 905} +{"train_info/time_between_train_steps": 0.004437446594238281, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 26.60388708114624, "step": 906} +{"train_info/time_between_train_steps": 0.0041201114654541016, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 26.28535032272339, "step": 907} +{"train_info/time_between_train_steps": 0.004294872283935547, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 26.400417804718018, "step": 908} +{"train_info/time_between_train_steps": 0.004037380218505859, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 26.35604238510132, "step": 909} +{"train_info/time_between_train_steps": 0.0043408870697021484, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 26.470711708068848, "step": 910} +{"train_info/time_between_train_steps": 0.004062652587890625, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 26.357715368270874, "step": 911} +{"train_info/time_between_train_steps": 0.004116058349609375, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 26.373028993606567, "step": 912} +{"train_info/time_between_train_steps": 0.004277706146240234, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 26.272377014160156, "step": 913} +{"train_info/time_between_train_steps": 0.004189014434814453, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 26.404969692230225, "step": 914} +{"train_info/time_between_train_steps": 0.004099607467651367, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 26.39298915863037, "step": 915} +{"train_info/time_between_train_steps": 0.003933906555175781, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 26.33531904220581, "step": 916} +{"train_info/time_between_train_steps": 0.004026174545288086, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 26.266841888427734, "step": 917} +{"train_info/time_between_train_steps": 0.02196049690246582, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 26.257744550704956, "step": 918} +{"train_info/time_between_train_steps": 0.0038099288940429688, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 26.271628856658936, "step": 919} +{"train_info/time_between_train_steps": 0.004044294357299805, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 26.271029710769653, "step": 920} +{"train_info/time_between_train_steps": 0.003851175308227539, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 26.26153802871704, "step": 921} +{"train_info/time_between_train_steps": 0.004431247711181641, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 26.310349702835083, "step": 922} +{"train_info/time_between_train_steps": 0.003938198089599609, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 26.266645908355713, "step": 923} +{"train_info/time_between_train_steps": 0.003958702087402344, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 26.338778257369995, "step": 924} +{"train_info/time_between_train_steps": 0.003874540328979492, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 26.261645317077637, "step": 925} +{"train_info/time_between_train_steps": 0.00413823127746582, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 26.25719451904297, "step": 926} +{"train_info/time_between_train_steps": 0.003881692886352539, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 26.268102407455444, "step": 927} +{"train_info/time_between_train_steps": 0.0039000511169433594, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 26.27139687538147, "step": 928} +{"train_info/time_between_train_steps": 0.003937721252441406, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 26.304595708847046, "step": 929} +{"train_info/time_between_train_steps": 0.0038247108459472656, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 26.266855239868164, "step": 930} +{"train_info/time_between_train_steps": 0.003847360610961914, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 26.306652069091797, "step": 931} +{"train_info/time_between_train_steps": 0.003875732421875, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 26.270967960357666, "step": 932} +{"train_info/time_between_train_steps": 0.003774404525756836, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 26.289881229400635, "step": 933} +{"train_info/time_between_train_steps": 0.003961324691772461, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 26.293593406677246, "step": 934} +{"train_info/time_between_train_steps": 0.0041615962982177734, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 26.291878700256348, "step": 935} +{"train_info/time_between_train_steps": 0.00433039665222168, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 26.3421630859375, "step": 936} +{"train_info/time_between_train_steps": 0.0048983097076416016, "step": 936} +{"train_info/time_between_train_steps": 16.72085452079773, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 26.295732498168945, "step": 937} +{"train_info/time_between_train_steps": 0.0036728382110595703, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 26.37545919418335, "step": 938} +{"train_info/time_between_train_steps": 0.003797769546508789, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 26.325336694717407, "step": 939} +{"train_info/time_between_train_steps": 0.00542902946472168, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 26.39062476158142, "step": 940} +{"train_info/time_between_train_steps": 0.003962516784667969, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 26.279120683670044, "step": 941} +{"train_info/time_between_train_steps": 0.004027843475341797, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 26.410251140594482, "step": 942} +{"train_info/time_between_train_steps": 0.003849029541015625, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 26.276339769363403, "step": 943} +{"train_info/time_between_train_steps": 0.0038878917694091797, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 26.397884368896484, "step": 944} +{"train_info/time_between_train_steps": 0.005878448486328125, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 26.275654554367065, "step": 945} +{"train_info/time_between_train_steps": 0.005903005599975586, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 26.362282514572144, "step": 946} +{"train_info/time_between_train_steps": 0.0037958621978759766, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 27.170952081680298, "step": 947} +{"train_info/time_between_train_steps": 0.003984928131103516, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 26.383862257003784, "step": 948} +{"train_info/time_between_train_steps": 0.00398707389831543, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 26.28371286392212, "step": 949} +{"train_info/time_between_train_steps": 0.004126071929931641, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 26.36414647102356, "step": 950} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740951727, "_runtime": 26233}, "step": 950} +{"logs": {"train/loss": 2.7915, "train/learning_rate": 0.0001388888888888889, "train/epoch": 26.01, "_timestamp": 1740951727, "_runtime": 26233}, "step": 950} +{"train_info/time_between_train_steps": 0.0249178409576416, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 26.273855447769165, "step": 951} +{"train_info/time_between_train_steps": 0.0038022994995117188, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 26.356960773468018, "step": 952} +{"train_info/time_between_train_steps": 0.004088640213012695, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 26.451388835906982, "step": 953} +{"train_info/time_between_train_steps": 0.018955230712890625, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 26.29809260368347, "step": 954} +{"train_info/time_between_train_steps": 0.0037276744842529297, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 26.307713985443115, "step": 955} +{"train_info/time_between_train_steps": 0.003755807876586914, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 26.280768156051636, "step": 956} +{"train_info/time_between_train_steps": 0.0036602020263671875, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 26.258760690689087, "step": 957} +{"train_info/time_between_train_steps": 0.0038270950317382812, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 26.917903900146484, "step": 958} +{"train_info/time_between_train_steps": 0.0038089752197265625, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 26.287259578704834, "step": 959} +{"train_info/time_between_train_steps": 0.0039424896240234375, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 26.2850923538208, "step": 960} +{"train_info/time_between_train_steps": 0.003988504409790039, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 26.259284496307373, "step": 961} +{"train_info/time_between_train_steps": 0.003926753997802734, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 26.258836030960083, "step": 962} +{"train_info/time_between_train_steps": 0.004362344741821289, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 26.268636226654053, "step": 963} +{"train_info/time_between_train_steps": 0.003817319869995117, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 26.271252870559692, "step": 964} +{"train_info/time_between_train_steps": 0.003862619400024414, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 26.28088927268982, "step": 965} +{"train_info/time_between_train_steps": 0.003828287124633789, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 26.26005220413208, "step": 966} +{"train_info/time_between_train_steps": 0.00383758544921875, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 26.30933690071106, "step": 967} +{"train_info/time_between_train_steps": 0.003905057907104492, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 26.26844096183777, "step": 968} +{"train_info/time_between_train_steps": 0.00409245491027832, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 27.119266271591187, "step": 969} +{"train_info/time_between_train_steps": 0.00415349006652832, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 26.506746292114258, "step": 970} +{"train_info/time_between_train_steps": 0.004239559173583984, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 26.8336021900177, "step": 971} +{"train_info/time_between_train_steps": 0.004599094390869141, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 27.11121392250061, "step": 972} +{"train_info/time_between_train_steps": 0.004954099655151367, "step": 972} +{"train_info/time_between_train_steps": 18.206936836242676, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 26.794671058654785, "step": 973} +{"train_info/time_between_train_steps": 0.0037126541137695312, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 26.391202688217163, "step": 974} +{"train_info/time_between_train_steps": 0.0037932395935058594, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 26.300745964050293, "step": 975} +{"train_info/time_between_train_steps": 0.003885507583618164, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 26.35406804084778, "step": 976} +{"train_info/time_between_train_steps": 0.003916740417480469, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 26.260773420333862, "step": 977} +{"train_info/time_between_train_steps": 0.003935337066650391, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 26.36851453781128, "step": 978} +{"train_info/time_between_train_steps": 0.0039141178131103516, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 26.258055210113525, "step": 979} +{"train_info/time_between_train_steps": 0.003999948501586914, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 26.994877815246582, "step": 980} +{"train_info/time_between_train_steps": 0.004036903381347656, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 26.267815351486206, "step": 981} +{"train_info/time_between_train_steps": 0.0040056705474853516, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 26.37145447731018, "step": 982} +{"train_info/time_between_train_steps": 0.003991842269897461, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 26.270459413528442, "step": 983} +{"train_info/time_between_train_steps": 0.003933906555175781, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 26.372223615646362, "step": 984} +{"train_info/time_between_train_steps": 0.003945589065551758, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 26.29231882095337, "step": 985} +{"train_info/time_between_train_steps": 0.004103660583496094, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 26.443803310394287, "step": 986} +{"train_info/time_between_train_steps": 0.003916740417480469, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 26.272801876068115, "step": 987} +{"train_info/time_between_train_steps": 0.0041315555572509766, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 26.336872100830078, "step": 988} +{"train_info/time_between_train_steps": 0.00397038459777832, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 26.278553009033203, "step": 989} +{"train_info/time_between_train_steps": 0.01461935043334961, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 26.270540237426758, "step": 990} +{"train_info/time_between_train_steps": 0.0037670135498046875, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 26.461801052093506, "step": 991} +{"train_info/time_between_train_steps": 0.003834962844848633, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 26.256428956985474, "step": 992} +{"train_info/time_between_train_steps": 0.003942728042602539, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 26.255524396896362, "step": 993} +{"train_info/time_between_train_steps": 0.003856182098388672, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 26.342925310134888, "step": 994} +{"train_info/time_between_train_steps": 0.003779888153076172, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 26.256799459457397, "step": 995} +{"train_info/time_between_train_steps": 0.0039539337158203125, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 26.30146098136902, "step": 996} +{"train_info/time_between_train_steps": 0.003909111022949219, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 26.271313428878784, "step": 997} +{"train_info/time_between_train_steps": 0.003991365432739258, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 26.25691866874695, "step": 998} +{"train_info/time_between_train_steps": 0.003932476043701172, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 26.267972469329834, "step": 999} +{"train_info/time_between_train_steps": 0.003911733627319336, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 26.44426989555359, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740953068, "_runtime": 27574}, "step": 1000} +{"logs": {"train/loss": 2.7268, "train/learning_rate": 0.00011111111111111109, "train/epoch": 27.02, "_timestamp": 1740953068, "_runtime": 27574}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740953070, "_runtime": 27576}, "step": 1000} +{"logs": {"eval/loss": 3.4587721824645996, "eval/runtime": 2.4277, "eval/samples_per_second": 54.372, "eval/steps_per_second": 3.707, "train/epoch": 27.02, "_timestamp": 1740953070, "_runtime": 27576}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740953071, "_runtime": 27577}, "step": 1000} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4587721824645996, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 31.777935045784314, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4277, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 54.372, "train/epoch": 27.02, "_timestamp": 1740953071, "_runtime": 27577}, "step": 1000} +{"train_info/time_between_train_steps": 32.67134666442871, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 26.7452073097229, "step": 1001} +{"train_info/time_between_train_steps": 0.0037267208099365234, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 26.28001093864441, "step": 1002} +{"train_info/time_between_train_steps": 0.0036814212799072266, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 26.263545751571655, "step": 1003} +{"train_info/time_between_train_steps": 0.003827810287475586, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 26.30487632751465, "step": 1004} +{"train_info/time_between_train_steps": 0.003901958465576172, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 26.28062653541565, "step": 1005} +{"train_info/time_between_train_steps": 0.0039784908294677734, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 26.276341915130615, "step": 1006} +{"train_info/time_between_train_steps": 0.004033803939819336, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 26.268304347991943, "step": 1007} +{"train_info/time_between_train_steps": 0.004503965377807617, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 26.34157419204712, "step": 1008} +{"train_info/time_between_train_steps": 0.004694938659667969, "step": 1008} +{"train_info/time_between_train_steps": 16.393932819366455, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 26.253634452819824, "step": 1009} +{"train_info/time_between_train_steps": 0.003754854202270508, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 26.342570543289185, "step": 1010} +{"train_info/time_between_train_steps": 0.003803253173828125, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 26.39340353012085, "step": 1011} +{"train_info/time_between_train_steps": 0.003909111022949219, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 26.348170042037964, "step": 1012} +{"train_info/time_between_train_steps": 0.0037741661071777344, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 26.26046371459961, "step": 1013} +{"train_info/time_between_train_steps": 0.003951549530029297, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 26.355597496032715, "step": 1014} +{"train_info/time_between_train_steps": 0.003960609436035156, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 26.260579109191895, "step": 1015} +{"train_info/time_between_train_steps": 0.0039119720458984375, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 26.427610635757446, "step": 1016} +{"train_info/time_between_train_steps": 0.003932952880859375, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 26.30376935005188, "step": 1017} +{"train_info/time_between_train_steps": 0.003930807113647461, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 26.341842889785767, "step": 1018} +{"train_info/time_between_train_steps": 0.003874540328979492, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 26.26887059211731, "step": 1019} +{"train_info/time_between_train_steps": 0.0038297176361083984, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 26.346895217895508, "step": 1020} +{"train_info/time_between_train_steps": 0.003869295120239258, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 26.292529344558716, "step": 1021} +{"train_info/time_between_train_steps": 0.0042035579681396484, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 26.919132709503174, "step": 1022} +{"train_info/time_between_train_steps": 0.004097938537597656, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 28.309587001800537, "step": 1023} +{"train_info/time_between_train_steps": 0.0039424896240234375, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 26.353620767593384, "step": 1024} +{"train_info/time_between_train_steps": 0.004132747650146484, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 26.290180206298828, "step": 1025} +{"train_info/time_between_train_steps": 0.01453399658203125, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 26.30088472366333, "step": 1026} +{"train_info/time_between_train_steps": 0.0037708282470703125, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 26.256555557250977, "step": 1027} +{"train_info/time_between_train_steps": 0.0037903785705566406, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 26.328312635421753, "step": 1028} +{"train_info/time_between_train_steps": 0.003915548324584961, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 26.264345407485962, "step": 1029} +{"train_info/time_between_train_steps": 0.003802061080932617, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 26.315943479537964, "step": 1030} +{"train_info/time_between_train_steps": 0.003815174102783203, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 26.27959394454956, "step": 1031} +{"train_info/time_between_train_steps": 0.0038003921508789062, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 26.35531735420227, "step": 1032} +{"train_info/time_between_train_steps": 0.003916740417480469, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 26.308634519577026, "step": 1033} +{"train_info/time_between_train_steps": 0.003933429718017578, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 26.565508365631104, "step": 1034} +{"train_info/time_between_train_steps": 0.0038199424743652344, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 26.256641387939453, "step": 1035} +{"train_info/time_between_train_steps": 0.003876924514770508, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 26.267035245895386, "step": 1036} +{"train_info/time_between_train_steps": 0.0038385391235351562, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 26.253657341003418, "step": 1037} +{"train_info/time_between_train_steps": 0.003838777542114258, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 26.256944179534912, "step": 1038} +{"train_info/time_between_train_steps": 0.0038602352142333984, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 26.27955436706543, "step": 1039} +{"train_info/time_between_train_steps": 0.003952503204345703, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 26.270614624023438, "step": 1040} +{"train_info/time_between_train_steps": 0.00397491455078125, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 26.277733325958252, "step": 1041} +{"train_info/time_between_train_steps": 0.003979682922363281, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 26.310348510742188, "step": 1042} +{"train_info/time_between_train_steps": 0.004108428955078125, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 26.274713277816772, "step": 1043} +{"train_info/time_between_train_steps": 0.004450798034667969, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 26.36578369140625, "step": 1044} +{"train_info/time_between_train_steps": 0.006640434265136719, "step": 1044} +{"train_info/time_between_train_steps": 16.541450262069702, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 29.069862127304077, "step": 1045} +{"train_info/time_between_train_steps": 0.0038204193115234375, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 27.11939311027527, "step": 1046} +{"train_info/time_between_train_steps": 0.003707408905029297, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 26.31198740005493, "step": 1047} +{"train_info/time_between_train_steps": 0.003736257553100586, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 26.356603622436523, "step": 1048} +{"train_info/time_between_train_steps": 0.003750324249267578, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 26.250345945358276, "step": 1049} +{"train_info/time_between_train_steps": 0.0037822723388671875, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 26.412365674972534, "step": 1050} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740954461, "_runtime": 28967}, "step": 1050} +{"logs": {"train/loss": 2.7262, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 29.0, "_timestamp": 1740954461, "_runtime": 28967}, "step": 1050} +{"train_info/time_between_train_steps": 0.14145636558532715, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 26.256557941436768, "step": 1051} +{"train_info/time_between_train_steps": 0.0038700103759765625, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 26.327175617218018, "step": 1052} +{"train_info/time_between_train_steps": 0.003896951675415039, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 26.268771648406982, "step": 1053} +{"train_info/time_between_train_steps": 0.0040934085845947266, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 26.350714683532715, "step": 1054} +{"train_info/time_between_train_steps": 0.003911018371582031, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 26.244731187820435, "step": 1055} +{"train_info/time_between_train_steps": 0.003768444061279297, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 26.514808416366577, "step": 1056} +{"train_info/time_between_train_steps": 0.0038900375366210938, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 26.26222848892212, "step": 1057} +{"train_info/time_between_train_steps": 0.004039287567138672, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 26.36827564239502, "step": 1058} +{"train_info/time_between_train_steps": 0.0041005611419677734, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 26.28714370727539, "step": 1059} +{"train_info/time_between_train_steps": 0.003951072692871094, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 26.34393835067749, "step": 1060} +{"train_info/time_between_train_steps": 0.004033327102661133, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 26.303345918655396, "step": 1061} +{"train_info/time_between_train_steps": 0.013933420181274414, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 26.26438546180725, "step": 1062} +{"train_info/time_between_train_steps": 0.003825664520263672, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 26.64761972427368, "step": 1063} +{"train_info/time_between_train_steps": 0.003991365432739258, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 27.062249660491943, "step": 1064} +{"train_info/time_between_train_steps": 0.0037975311279296875, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 26.272651433944702, "step": 1065} +{"train_info/time_between_train_steps": 0.0038695335388183594, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 26.275906085968018, "step": 1066} +{"train_info/time_between_train_steps": 0.0037987232208251953, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 26.273192405700684, "step": 1067} +{"train_info/time_between_train_steps": 0.0038797855377197266, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 26.29048776626587, "step": 1068} +{"train_info/time_between_train_steps": 0.003916263580322266, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 26.265092134475708, "step": 1069} +{"train_info/time_between_train_steps": 0.004094123840332031, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 26.288703203201294, "step": 1070} +{"train_info/time_between_train_steps": 0.0038170814514160156, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 26.27777671813965, "step": 1071} +{"train_info/time_between_train_steps": 0.0037796497344970703, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 26.277568578720093, "step": 1072} +{"train_info/time_between_train_steps": 0.0038416385650634766, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 27.062204837799072, "step": 1073} +{"train_info/time_between_train_steps": 0.004112958908081055, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 26.658689737319946, "step": 1074} +{"train_info/time_between_train_steps": 0.003833293914794922, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 26.286384344100952, "step": 1075} +{"train_info/time_between_train_steps": 0.0038352012634277344, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 26.2854585647583, "step": 1076} +{"train_info/time_between_train_steps": 0.003900766372680664, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 26.295382738113403, "step": 1077} +{"train_info/time_between_train_steps": 0.003877878189086914, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 26.34588360786438, "step": 1078} +{"train_info/time_between_train_steps": 0.0039789676666259766, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 26.331080198287964, "step": 1079} +{"train_info/time_between_train_steps": 0.006594419479370117, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 26.3663547039032, "step": 1080} +{"train_info/time_between_train_steps": 0.004647016525268555, "step": 1080} +{"train_info/time_between_train_steps": 16.304420948028564, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 26.26019525527954, "step": 1081} +{"train_info/time_between_train_steps": 0.0036890506744384766, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 26.325485944747925, "step": 1082} +{"train_info/time_between_train_steps": 0.003715038299560547, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 26.261379718780518, "step": 1083} +{"train_info/time_between_train_steps": 0.003750324249267578, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 26.34180974960327, "step": 1084} +{"train_info/time_between_train_steps": 0.0037746429443359375, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 26.2617130279541, "step": 1085} +{"train_info/time_between_train_steps": 0.003805875778198242, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 26.37246608734131, "step": 1086} +{"train_info/time_between_train_steps": 0.003796815872192383, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 26.267517566680908, "step": 1087} +{"train_info/time_between_train_steps": 0.004038572311401367, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 26.95711326599121, "step": 1088} +{"train_info/time_between_train_steps": 0.004153013229370117, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 26.28251314163208, "step": 1089} +{"train_info/time_between_train_steps": 0.003907918930053711, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 26.358297109603882, "step": 1090} +{"train_info/time_between_train_steps": 0.0038771629333496094, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 26.274163961410522, "step": 1091} +{"train_info/time_between_train_steps": 0.003912448883056641, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 26.369800329208374, "step": 1092} +{"train_info/time_between_train_steps": 0.004049777984619141, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 26.352267503738403, "step": 1093} +{"train_info/time_between_train_steps": 0.00395965576171875, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 26.35683822631836, "step": 1094} +{"train_info/time_between_train_steps": 0.003895998001098633, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 26.26151180267334, "step": 1095} +{"train_info/time_between_train_steps": 0.003953456878662109, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 26.34864640235901, "step": 1096} +{"train_info/time_between_train_steps": 0.003982067108154297, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 26.27839970588684, "step": 1097} +{"train_info/time_between_train_steps": 0.016366004943847656, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 26.25509786605835, "step": 1098} +{"train_info/time_between_train_steps": 0.0037431716918945312, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 26.673247575759888, "step": 1099} +{"train_info/time_between_train_steps": 0.003763914108276367, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 26.282026052474976, "step": 1100} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740955799, "_runtime": 30305}, "step": 1100} +{"logs": {"train/loss": 2.6701, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 30.02, "_timestamp": 1740955799, "_runtime": 30305}, "step": 1100} +{"train_info/time_between_train_steps": 30.590426921844482, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 26.442368507385254, "step": 1101} +{"train_info/time_between_train_steps": 0.004277229309082031, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 26.36003065109253, "step": 1102} +{"train_info/time_between_train_steps": 0.003956794738769531, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 26.338602542877197, "step": 1103} +{"train_info/time_between_train_steps": 0.004057407379150391, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 26.28592300415039, "step": 1104} +{"train_info/time_between_train_steps": 0.003961801528930664, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 26.324081659317017, "step": 1105} +{"train_info/time_between_train_steps": 0.004119157791137695, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 26.286665201187134, "step": 1106} +{"train_info/time_between_train_steps": 0.004118919372558594, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 26.3188533782959, "step": 1107} +{"train_info/time_between_train_steps": 0.003915548324584961, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 26.271191596984863, "step": 1108} +{"train_info/time_between_train_steps": 0.004011392593383789, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 26.493072748184204, "step": 1109} +{"train_info/time_between_train_steps": 0.003908634185791016, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 26.27524757385254, "step": 1110} +{"train_info/time_between_train_steps": 0.003965854644775391, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 26.281041860580444, "step": 1111} +{"train_info/time_between_train_steps": 0.004221916198730469, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 26.264901161193848, "step": 1112} +{"train_info/time_between_train_steps": 0.003942966461181641, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 26.26663613319397, "step": 1113} +{"train_info/time_between_train_steps": 0.003891468048095703, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 26.287928342819214, "step": 1114} +{"train_info/time_between_train_steps": 0.004150867462158203, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 26.292761087417603, "step": 1115} +{"train_info/time_between_train_steps": 0.0045735836029052734, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 26.369067907333374, "step": 1116} +{"train_info/time_between_train_steps": 0.0045318603515625, "step": 1116} +{"train_info/time_between_train_steps": 16.413825035095215, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 26.257237195968628, "step": 1117} +{"train_info/time_between_train_steps": 0.0037424564361572266, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 26.357471227645874, "step": 1118} +{"train_info/time_between_train_steps": 0.003719329833984375, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 26.245108127593994, "step": 1119} +{"train_info/time_between_train_steps": 0.003816843032836914, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 26.36634135246277, "step": 1120} +{"train_info/time_between_train_steps": 0.003738880157470703, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 26.26868462562561, "step": 1121} +{"train_info/time_between_train_steps": 0.0037009716033935547, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 26.364776849746704, "step": 1122} +{"train_info/time_between_train_steps": 0.003928422927856445, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 26.291624784469604, "step": 1123} +{"train_info/time_between_train_steps": 0.004008293151855469, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 26.460681200027466, "step": 1124} +{"train_info/time_between_train_steps": 0.003921031951904297, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 26.275965929031372, "step": 1125} +{"train_info/time_between_train_steps": 0.0038444995880126953, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 26.370580673217773, "step": 1126} +{"train_info/time_between_train_steps": 0.003900766372680664, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 26.294041633605957, "step": 1127} +{"train_info/time_between_train_steps": 0.003962039947509766, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 26.372486352920532, "step": 1128} +{"train_info/time_between_train_steps": 0.0040209293365478516, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 26.27283000946045, "step": 1129} +{"train_info/time_between_train_steps": 0.003937244415283203, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 26.368844270706177, "step": 1130} +{"train_info/time_between_train_steps": 0.004074811935424805, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 26.27733087539673, "step": 1131} +{"train_info/time_between_train_steps": 0.0038979053497314453, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 26.350786924362183, "step": 1132} +{"train_info/time_between_train_steps": 0.004116535186767578, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 26.278088331222534, "step": 1133} +{"train_info/time_between_train_steps": 0.013945579528808594, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 26.25589418411255, "step": 1134} +{"train_info/time_between_train_steps": 0.0037276744842529297, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 26.255332231521606, "step": 1135} +{"train_info/time_between_train_steps": 0.003756999969482422, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 26.31064534187317, "step": 1136} +{"train_info/time_between_train_steps": 0.0037758350372314453, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 26.267761945724487, "step": 1137} +{"train_info/time_between_train_steps": 0.0038590431213378906, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 26.291895866394043, "step": 1138} +{"train_info/time_between_train_steps": 0.003833770751953125, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 26.292378187179565, "step": 1139} +{"train_info/time_between_train_steps": 0.004024505615234375, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 26.3356876373291, "step": 1140} +{"train_info/time_between_train_steps": 0.0039255619049072266, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 26.562273025512695, "step": 1141} +{"train_info/time_between_train_steps": 0.0038557052612304688, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 26.259376764297485, "step": 1142} +{"train_info/time_between_train_steps": 0.003858804702758789, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 26.27237033843994, "step": 1143} +{"train_info/time_between_train_steps": 0.0041921138763427734, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 26.28261947631836, "step": 1144} +{"train_info/time_between_train_steps": 0.0039031505584716797, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 26.270026683807373, "step": 1145} +{"train_info/time_between_train_steps": 0.003888845443725586, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 26.290667057037354, "step": 1146} +{"train_info/time_between_train_steps": 0.003846883773803711, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 26.272127389907837, "step": 1147} +{"train_info/time_between_train_steps": 0.003824472427368164, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 26.298158645629883, "step": 1148} +{"train_info/time_between_train_steps": 0.0038628578186035156, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 26.281367301940918, "step": 1149} +{"train_info/time_between_train_steps": 0.004042625427246094, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 26.311747312545776, "step": 1150} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740957163, "_runtime": 31669}, "step": 1150} +{"logs": {"train/loss": 2.6452, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 31.03, "_timestamp": 1740957163, "_runtime": 31669}, "step": 1150} +{"train_info/time_between_train_steps": 0.05220818519592285, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 26.294676780700684, "step": 1151} +{"train_info/time_between_train_steps": 0.004453182220458984, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 26.39612340927124, "step": 1152} +{"train_info/time_between_train_steps": 0.0047321319580078125, "step": 1152} +{"train_info/time_between_train_steps": 16.472058534622192, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 26.291861057281494, "step": 1153} +{"train_info/time_between_train_steps": 0.0037021636962890625, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 26.398526668548584, "step": 1154} +{"train_info/time_between_train_steps": 0.003914356231689453, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 26.327687740325928, "step": 1155} +{"train_info/time_between_train_steps": 0.003886699676513672, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 26.356858253479004, "step": 1156} +{"train_info/time_between_train_steps": 0.003929853439331055, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 26.26586890220642, "step": 1157} +{"train_info/time_between_train_steps": 0.003965616226196289, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 26.357666730880737, "step": 1158} +{"train_info/time_between_train_steps": 0.003979206085205078, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 26.34265971183777, "step": 1159} +{"train_info/time_between_train_steps": 0.003857135772705078, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 26.354800939559937, "step": 1160} +{"train_info/time_between_train_steps": 0.0038518905639648438, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 26.26593518257141, "step": 1161} +{"train_info/time_between_train_steps": 0.004079103469848633, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 26.375229120254517, "step": 1162} +{"train_info/time_between_train_steps": 0.003966331481933594, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 26.28188705444336, "step": 1163} +{"train_info/time_between_train_steps": 0.004015684127807617, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 26.34996223449707, "step": 1164} +{"train_info/time_between_train_steps": 0.00407099723815918, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 26.275843620300293, "step": 1165} +{"train_info/time_between_train_steps": 0.003954887390136719, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 26.34490656852722, "step": 1166} +{"train_info/time_between_train_steps": 0.0038301944732666016, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 26.27482557296753, "step": 1167} +{"train_info/time_between_train_steps": 0.003747224807739258, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 26.359145641326904, "step": 1168} +{"train_info/time_between_train_steps": 0.005743980407714844, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 26.417986631393433, "step": 1169} +{"train_info/time_between_train_steps": 0.0189516544342041, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 26.457237243652344, "step": 1170} +{"train_info/time_between_train_steps": 0.003816843032836914, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 26.256906270980835, "step": 1171} +{"train_info/time_between_train_steps": 0.0039055347442626953, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 26.298410654067993, "step": 1172} +{"train_info/time_between_train_steps": 0.0037925243377685547, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 26.25319218635559, "step": 1173} +{"train_info/time_between_train_steps": 0.003900289535522461, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 27.09635639190674, "step": 1174} +{"train_info/time_between_train_steps": 0.004014492034912109, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 27.282048225402832, "step": 1175} +{"train_info/time_between_train_steps": 0.003908634185791016, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 26.279173374176025, "step": 1176} +{"train_info/time_between_train_steps": 0.0038938522338867188, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 26.2911958694458, "step": 1177} +{"train_info/time_between_train_steps": 0.003919839859008789, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 26.267714023590088, "step": 1178} +{"train_info/time_between_train_steps": 0.003839731216430664, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 26.28272008895874, "step": 1179} +{"train_info/time_between_train_steps": 0.003784656524658203, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 26.252883911132812, "step": 1180} +{"train_info/time_between_train_steps": 0.0038437843322753906, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 26.2528657913208, "step": 1181} +{"train_info/time_between_train_steps": 0.003851175308227539, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 26.267505884170532, "step": 1182} +{"train_info/time_between_train_steps": 0.0038225650787353516, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 26.310956239700317, "step": 1183} +{"train_info/time_between_train_steps": 0.0038652420043945312, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 26.27114963531494, "step": 1184} +{"train_info/time_between_train_steps": 0.003847837448120117, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 26.30470299720764, "step": 1185} +{"train_info/time_between_train_steps": 0.003929615020751953, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 26.73023247718811, "step": 1186} +{"train_info/time_between_train_steps": 0.0042269229888916016, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 26.44282817840576, "step": 1187} +{"train_info/time_between_train_steps": 0.004560232162475586, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 26.82607674598694, "step": 1188} +{"train_info/time_between_train_steps": 0.004815340042114258, "step": 1188} +{"train_info/time_between_train_steps": 17.13468313217163, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 26.24376344680786, "step": 1189} +{"train_info/time_between_train_steps": 0.0038404464721679688, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 26.359416961669922, "step": 1190} +{"train_info/time_between_train_steps": 0.003822803497314453, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 26.253264904022217, "step": 1191} +{"train_info/time_between_train_steps": 0.0038938522338867188, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 26.366687297821045, "step": 1192} +{"train_info/time_between_train_steps": 0.003804922103881836, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 26.257583618164062, "step": 1193} +{"train_info/time_between_train_steps": 0.003941535949707031, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 26.34784698486328, "step": 1194} +{"train_info/time_between_train_steps": 0.003773212432861328, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 26.25899624824524, "step": 1195} +{"train_info/time_between_train_steps": 0.003793001174926758, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 26.3376624584198, "step": 1196} +{"train_info/time_between_train_steps": 0.0037598609924316406, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 26.26887011528015, "step": 1197} +{"train_info/time_between_train_steps": 0.0038857460021972656, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 26.336631536483765, "step": 1198} +{"train_info/time_between_train_steps": 0.003909111022949219, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 26.263514280319214, "step": 1199} +{"train_info/time_between_train_steps": 0.0038709640502929688, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 26.356619834899902, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740958519, "_runtime": 33025}, "step": 1200} +{"logs": {"train/loss": 2.6559, "train/learning_rate": 0.0, "train/epoch": 33.01, "_timestamp": 1740958519, "_runtime": 33025}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740958550, "_runtime": 33056}, "step": 1200} +{"logs": {"train/train_runtime": 33056.9033, "train/train_samples_per_second": 18.586, "train/train_steps_per_second": 0.036, "train/total_flos": 3.25835344576512e+17, "train/train_loss": 3.411376837094625, "train/epoch": 33.01, "_timestamp": 1740958550, "_runtime": 33056}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5537109375, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740958562, "_runtime": 33068}, "step": 1200} +{"logs": {"eval/loss": 3.483367919921875, "eval/runtime": 2.3188, "eval/samples_per_second": 56.925, "eval/steps_per_second": 3.881, "train/epoch": 33.01, "_timestamp": 1740958562, "_runtime": 33068}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5537109375, "train_info/memory_max_allocated": 18056.2958984375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740958563, "_runtime": 33069}, "step": 1200} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.483367919921875, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 32.569228120787244, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.3188, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 56.925, "train/epoch": 33.01, "_timestamp": 1740958563, "_runtime": 33069}, "step": 1200} diff --git a/perturb_np_num_det_adj_zh_ZH_randinit_seed53.log b/perturb_np_num_det_adj_zh_ZH_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..3f74b76298f4bd4c37d741c0ad59046cbb126c86 --- /dev/null +++ b/perturb_np_num_det_adj_zh_ZH_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 03/02 [15:17:46] - mistral - INFO :: Starting Run: perturb_np_num_det_adj_zh_ZH_randinit_seed53... +|=>> 03/02 [15:17:46] - mistral - INFO :: Setting Random Seed to 53! +|=>> 03/02 [15:17:46] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 03/02 [15:17:46] - mistral - INFO :: Using Configs For Model From: /scratch/xiulyang/multilingual-LM/mistral/conf/models/gpt2-small-ZH.json ... +|=>> 03/02 [15:17:46] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'embd_pdrop': 0.1, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 21128} ... +|=>> 03/02 [15:17:46] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 03/02 [15:17:46] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 03/02 [15:17:46] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 03/02 [15:17:49] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 03/02 [15:17:49] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 03/02 [15:17:49] - mistral - INFO :: Downloading and Preprocessing Dataset `/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py`... +|=>> 03/02 [15:17:50] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/perturb_np_num_det_adj_zh/train +|=>> 03/02 [15:17:52] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Total sentences: 1059468 +|=>> 03/02 [15:17:52] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 03/02 [15:17:56] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 03/02 [15:17:56] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 03/02 [15:17:57] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 03/02 [15:18:02] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/perturb_np_num_det_adj_zh/dev +|=>> 03/02 [15:18:02] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Total sentences: 5553 +|=>> 03/02 [15:18:02] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 03/02 [15:18:02] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 03/02 [15:18:02] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 03/02 [15:18:02] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 03/02 [15:18:02] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 03/02 [15:18:02] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 03/02 [15:18:29] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 03/02 [15:18:30] - mistral - INFO :: Initializing Model Trainer... +|=>> 03/02 [15:18:30] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//scratch/xiulyang/multilingual_models/perturb_np_num_det_adj_zh_ZH_randinit/babylm_perturb_np_num_det_adj_zh_ZH_randinit_seed53/runs/perturb_np_num_det_adj_zh_ZH_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=perturb_np_num_det_adj_zh_ZH_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 03/02 [15:18:39] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 03/02 [15:24:53] - mistral - INFO :: Training... +|=>> 03/02 [15:24:53] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 03/03 [00:36:00] - mistral - INFO :: ...and that's all folks! +|=>> 03/03 [00:36:00] - mistral - INFO :: Running final evaluation... diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..1fdfdbf52036d73260e04570717c1a6a23982713 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4d6589696541b6a5d42066e70e1f0fd48588b2aa262cd7b5b9f593990c31c7 +size 420912233 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..5424cc46c818b611ae506108fe123bab21b5b22b --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315e249c575e17de3e6961ca2ae12d127be6daa71a9eab261285cd82649d224 +size 3183