diff --git a/manual_upload/README.md b/manual_upload/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5849e32a79d280a97b1057c72c7d28de8856d1bd --- /dev/null +++ b/manual_upload/README.md @@ -0,0 +1,104 @@ +--- +tags: +- generated_from_trainer +model-index: +- name: elec-gmusic-familized-model-13-12__17-35-53 + results: [] +--- + + + +# elec-gmusic-familized-model-13-12__17-35-53 + +This model is a fine-tuned version of [JammyMachina/elec-gmusic-familized-model-13-12__17-35-53](https://huggingface.co/JammyMachina/elec-gmusic-familized-model-13-12__17-35-53) on an unknown dataset. +It achieves the following results on the evaluation set: +- Loss: 0.4303 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 36 +- eval_batch_size: 32 +- seed: 42 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: linear +- num_epochs: 5 +- mixed_precision_training: Native AMP + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:-----:|:-----:|:---------------:| +| 0.2509 | 0.1 | 1024 | 0.4268 | +| 0.2521 | 0.2 | 2048 | 0.4284 | +| 0.2533 | 0.3 | 3072 | 0.4219 | +| 0.2517 | 0.4 | 4096 | 0.4245 | +| 0.2512 | 0.5 | 5120 | 0.4229 | +| 0.2506 | 0.6 | 6144 | 0.4191 | +| 0.2512 | 0.71 | 7168 | 0.4247 | +| 0.2483 | 0.81 | 8192 | 0.4239 | +| 0.2479 | 0.91 | 9216 | 0.4259 | +| 0.2498 | 1.01 | 10240 | 0.4262 | +| 0.2467 | 1.11 | 11264 | 0.4267 | +| 0.2466 | 1.21 | 12288 | 0.4263 | +| 0.2449 | 1.31 | 13312 | 0.4251 | +| 0.2452 | 1.41 | 14336 | 0.4274 | +| 0.2449 | 1.51 | 15360 | 0.4263 | +| 0.2444 | 1.61 | 16384 | 0.4240 | +| 0.2428 | 1.71 | 17408 | 0.4289 | +| 0.2425 | 1.81 | 18432 | 0.4229 | +| 0.2424 | 1.91 | 19456 | 0.4291 | +| 0.2422 | 2.01 | 20480 | 0.4247 | +| 0.2397 | 2.12 | 21504 | 0.4271 | +| 0.2397 | 2.22 | 22528 | 0.4226 | +| 0.2411 | 2.32 | 23552 | 0.4269 | +| 0.2408 | 2.42 | 24576 | 0.4288 | +| 0.2392 | 2.52 | 25600 | 0.4223 | +| 0.2391 | 2.62 | 26624 | 0.4297 | +| 0.2385 | 2.72 | 27648 | 0.4253 | +| 0.2371 | 2.82 | 28672 | 0.4297 | +| 0.2373 | 2.92 | 29696 | 0.4232 | +| 0.2368 | 3.02 | 30720 | 0.4296 | +| 0.2355 | 3.12 | 31744 | 0.4327 | +| 0.2354 | 3.22 | 32768 | 0.4305 | +| 0.2345 | 3.32 | 33792 | 0.4286 | +| 0.2355 | 3.42 | 34816 | 0.4350 | +| 0.2353 | 3.53 | 35840 | 0.4269 | +| 0.2351 | 3.63 | 36864 | 0.4301 | +| 0.2336 | 3.73 | 37888 | 0.4301 | +| 0.2344 | 3.83 | 38912 | 0.4319 | +| 0.2339 | 3.93 | 39936 | 0.4305 | +| 0.2326 | 4.03 | 40960 | 0.4298 | +| 0.2316 | 4.13 | 41984 | 0.4308 | +| 0.2311 | 4.23 | 43008 | 0.4330 | +| 0.2315 | 4.33 | 44032 | 0.4313 | +| 0.2305 | 4.43 | 45056 | 0.4319 | +| 0.2328 | 4.53 | 46080 | 0.4292 | +| 0.232 | 4.63 | 47104 | 0.4289 | +| 0.2309 | 4.73 | 48128 | 0.4303 | +| 0.23 | 4.83 | 49152 | 0.4317 | +| 0.2315 | 4.94 | 50176 | 0.4303 | + + +### Framework versions + +- Transformers 4.26.0.dev0 +- Pytorch 1.13.1+cu116 +- Datasets 2.7.1 +- Tokenizers 0.13.2 diff --git a/manual_upload/checkpoint-32768/config.json b/manual_upload/checkpoint-32768/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e5041639331370e15270cc287c7f7a8566c79f1a --- /dev/null +++ b/manual_upload/checkpoint-32768/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "JammyMachina/elec-gmusic-familized-model-13-12__17-35-53", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_embd": 512, + "n_head": 8, + "n_inner": null, + "n_layer": 6, + "n_positions": 2048, + "pad_token_id": 1, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "torch_dtype": "float32", + "transformers_version": "4.26.0.dev0", + "use_cache": true, + "vocab_size": 301 +} diff --git a/manual_upload/checkpoint-32768/optimizer.pt b/manual_upload/checkpoint-32768/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..386a44f7eb82acbf387f72259cee35c45ac127f9 --- /dev/null +++ b/manual_upload/checkpoint-32768/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458ee72534eb8b5ad8dccb794450b446bad2494326bb8beefb83ed28205a0db6 +size 160988613 diff --git a/manual_upload/checkpoint-32768/pytorch_model.bin b/manual_upload/checkpoint-32768/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..219e677ef02bdd88d0f162ac31f3cf9b5b987a2f --- /dev/null +++ b/manual_upload/checkpoint-32768/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188a30cd4df09ff71af7227dcd5594d5c65b1afce81bdb7613c8f8b0eee22f06 +size 105666297 diff --git a/manual_upload/checkpoint-32768/rng_state.pth b/manual_upload/checkpoint-32768/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6213d922e72fe76f1818104cd9ad08540663d70c --- /dev/null +++ b/manual_upload/checkpoint-32768/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5139f2582c3e753523ce47e10d63506fe8b53897ec575066a8f405daa71ebee5 +size 17641 diff --git a/manual_upload/checkpoint-32768/scaler.pt b/manual_upload/checkpoint-32768/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fd0268fbbbfcb3d4f500a338a3bfb46242dc746 --- /dev/null +++ b/manual_upload/checkpoint-32768/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac2d928598cb5138c4a59645b28bcace0c1fd92792906a6e7ae72f6c4058c01 +size 557 diff --git a/manual_upload/checkpoint-32768/scheduler.pt b/manual_upload/checkpoint-32768/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bef93fdacaff563b712e135060d161851f30b7c4 --- /dev/null +++ b/manual_upload/checkpoint-32768/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab7e2068bb82ba682bdcc93651dcef0e77a4e1d68e53aa617acce19bdbe0f064 +size 627 diff --git a/manual_upload/checkpoint-32768/trainer_state.json b/manual_upload/checkpoint-32768/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9f244a50c2dfb69927b94b8b4e635fe3fec2c70d --- /dev/null +++ b/manual_upload/checkpoint-32768/trainer_state.json @@ -0,0 +1,464 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.2229762958591524, + "global_step": 32768, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "learning_rate": 4.899281990754402e-05, + "loss": 0.2509, + "step": 1024 + }, + { + "epoch": 0.1, + "eval_loss": 0.42676058411598206, + "eval_runtime": 32.3598, + "eval_samples_per_second": 106.212, + "eval_steps_per_second": 3.337, + "step": 1024 + }, + { + "epoch": 0.2, + "learning_rate": 4.798563981508803e-05, + "loss": 0.2521, + "step": 2048 + }, + { + "epoch": 0.2, + "eval_loss": 0.4283539652824402, + "eval_runtime": 32.4193, + "eval_samples_per_second": 106.017, + "eval_steps_per_second": 3.331, + "step": 2048 + }, + { + "epoch": 0.3, + "learning_rate": 4.697845972263204e-05, + "loss": 0.2533, + "step": 3072 + }, + { + "epoch": 0.3, + "eval_loss": 0.42187923192977905, + "eval_runtime": 32.3798, + "eval_samples_per_second": 106.146, + "eval_steps_per_second": 3.335, + "step": 3072 + }, + { + "epoch": 0.4, + "learning_rate": 4.597127963017606e-05, + "loss": 0.2517, + "step": 4096 + }, + { + "epoch": 0.4, + "eval_loss": 0.42451202869415283, + "eval_runtime": 32.3385, + "eval_samples_per_second": 106.282, + "eval_steps_per_second": 3.34, + "step": 4096 + }, + { + "epoch": 0.5, + "learning_rate": 4.496409953772008e-05, + "loss": 0.2512, + "step": 5120 + }, + { + "epoch": 0.5, + "eval_loss": 0.42285656929016113, + "eval_runtime": 32.3235, + "eval_samples_per_second": 106.331, + "eval_steps_per_second": 3.341, + "step": 5120 + }, + { + "epoch": 0.6, + "learning_rate": 4.3956919445264097e-05, + "loss": 0.2506, + "step": 6144 + }, + { + "epoch": 0.6, + "eval_loss": 0.4190705716609955, + "eval_runtime": 32.4437, + "eval_samples_per_second": 105.937, + "eval_steps_per_second": 3.329, + "step": 6144 + }, + { + "epoch": 0.71, + "learning_rate": 4.294973935280811e-05, + "loss": 0.2512, + "step": 7168 + }, + { + "epoch": 0.71, + "eval_loss": 0.4247213900089264, + "eval_runtime": 32.3289, + "eval_samples_per_second": 106.314, + "eval_steps_per_second": 3.341, + "step": 7168 + }, + { + "epoch": 0.81, + "learning_rate": 4.194255926035212e-05, + "loss": 0.2483, + "step": 8192 + }, + { + "epoch": 0.81, + "eval_loss": 0.4238651394844055, + "eval_runtime": 32.3862, + "eval_samples_per_second": 106.126, + "eval_steps_per_second": 3.335, + "step": 8192 + }, + { + "epoch": 0.91, + "learning_rate": 4.093537916789614e-05, + "loss": 0.2479, + "step": 9216 + }, + { + "epoch": 0.91, + "eval_loss": 0.4259129762649536, + "eval_runtime": 32.4627, + "eval_samples_per_second": 105.875, + "eval_steps_per_second": 3.327, + "step": 9216 + }, + { + "epoch": 1.01, + "learning_rate": 3.9928199075440155e-05, + "loss": 0.2498, + "step": 10240 + }, + { + "epoch": 1.01, + "eval_loss": 0.4262418746948242, + "eval_runtime": 32.3999, + "eval_samples_per_second": 106.081, + "eval_steps_per_second": 3.333, + "step": 10240 + }, + { + "epoch": 1.11, + "learning_rate": 3.8921018982984166e-05, + "loss": 0.2467, + "step": 11264 + }, + { + "epoch": 1.11, + "eval_loss": 0.4267333149909973, + "eval_runtime": 32.3308, + "eval_samples_per_second": 106.307, + "eval_steps_per_second": 3.34, + "step": 11264 + }, + { + "epoch": 1.21, + "learning_rate": 3.791482246483722e-05, + "loss": 0.2466, + "step": 12288 + }, + { + "epoch": 1.21, + "eval_loss": 0.4263165295124054, + "eval_runtime": 32.4348, + "eval_samples_per_second": 105.967, + "eval_steps_per_second": 3.33, + "step": 12288 + }, + { + "epoch": 1.31, + "learning_rate": 3.690764237238124e-05, + "loss": 0.2449, + "step": 13312 + }, + { + "epoch": 1.31, + "eval_loss": 0.42505738139152527, + "eval_runtime": 32.3762, + "eval_samples_per_second": 106.158, + "eval_steps_per_second": 3.336, + "step": 13312 + }, + { + "epoch": 1.41, + "learning_rate": 3.590144585423429e-05, + "loss": 0.2452, + "step": 14336 + }, + { + "epoch": 1.41, + "eval_loss": 0.42740598320961, + "eval_runtime": 32.44, + "eval_samples_per_second": 105.949, + "eval_steps_per_second": 3.329, + "step": 14336 + }, + { + "epoch": 1.51, + "learning_rate": 3.489426576177831e-05, + "loss": 0.2449, + "step": 15360 + }, + { + "epoch": 1.51, + "eval_loss": 0.42628249526023865, + "eval_runtime": 32.3172, + "eval_samples_per_second": 106.352, + "eval_steps_per_second": 3.342, + "step": 15360 + }, + { + "epoch": 1.61, + "learning_rate": 3.388708566932232e-05, + "loss": 0.2444, + "step": 16384 + }, + { + "epoch": 1.61, + "eval_loss": 0.42398524284362793, + "eval_runtime": 32.2909, + "eval_samples_per_second": 106.439, + "eval_steps_per_second": 3.345, + "step": 16384 + }, + { + "epoch": 1.71, + "learning_rate": 3.287990557686633e-05, + "loss": 0.2428, + "step": 17408 + }, + { + "epoch": 1.71, + "eval_loss": 0.42891454696655273, + "eval_runtime": 32.3773, + "eval_samples_per_second": 106.155, + "eval_steps_per_second": 3.336, + "step": 17408 + }, + { + "epoch": 1.81, + "learning_rate": 3.1873709058719384e-05, + "loss": 0.2425, + "step": 18432 + }, + { + "epoch": 1.81, + "eval_loss": 0.4228712022304535, + "eval_runtime": 32.4341, + "eval_samples_per_second": 105.969, + "eval_steps_per_second": 3.33, + "step": 18432 + }, + { + "epoch": 1.91, + "learning_rate": 3.08665289662634e-05, + "loss": 0.2424, + "step": 19456 + }, + { + "epoch": 1.91, + "eval_loss": 0.4291061758995056, + "eval_runtime": 32.3192, + "eval_samples_per_second": 106.345, + "eval_steps_per_second": 3.342, + "step": 19456 + }, + { + "epoch": 2.01, + "learning_rate": 2.985934887380742e-05, + "loss": 0.2422, + "step": 20480 + }, + { + "epoch": 2.01, + "eval_loss": 0.4246675968170166, + "eval_runtime": 32.2862, + "eval_samples_per_second": 106.454, + "eval_steps_per_second": 3.345, + "step": 20480 + }, + { + "epoch": 2.12, + "learning_rate": 2.8853152355660473e-05, + "loss": 0.2397, + "step": 21504 + }, + { + "epoch": 2.12, + "eval_loss": 0.42707231640815735, + "eval_runtime": 32.3373, + "eval_samples_per_second": 106.286, + "eval_steps_per_second": 3.34, + "step": 21504 + }, + { + "epoch": 2.22, + "learning_rate": 2.7846955837513527e-05, + "loss": 0.2397, + "step": 22528 + }, + { + "epoch": 2.22, + "eval_loss": 0.42262786626815796, + "eval_runtime": 32.3328, + "eval_samples_per_second": 106.301, + "eval_steps_per_second": 3.34, + "step": 22528 + }, + { + "epoch": 2.32, + "learning_rate": 2.6839775745057538e-05, + "loss": 0.2411, + "step": 23552 + }, + { + "epoch": 2.32, + "eval_loss": 0.42685696482658386, + "eval_runtime": 32.3962, + "eval_samples_per_second": 106.093, + "eval_steps_per_second": 3.334, + "step": 23552 + }, + { + "epoch": 2.42, + "learning_rate": 2.5832595652601556e-05, + "loss": 0.2408, + "step": 24576 + }, + { + "epoch": 2.42, + "eval_loss": 0.42877742648124695, + "eval_runtime": 32.3163, + "eval_samples_per_second": 106.355, + "eval_steps_per_second": 3.342, + "step": 24576 + }, + { + "epoch": 2.52, + "learning_rate": 2.482541556014557e-05, + "loss": 0.2392, + "step": 25600 + }, + { + "epoch": 2.52, + "eval_loss": 0.42227810621261597, + "eval_runtime": 32.369, + "eval_samples_per_second": 106.182, + "eval_steps_per_second": 3.337, + "step": 25600 + }, + { + "epoch": 2.62, + "learning_rate": 2.3819219041998624e-05, + "loss": 0.2391, + "step": 26624 + }, + { + "epoch": 2.62, + "eval_loss": 0.4296777546405792, + "eval_runtime": 32.4315, + "eval_samples_per_second": 105.977, + "eval_steps_per_second": 3.33, + "step": 26624 + }, + { + "epoch": 2.72, + "learning_rate": 2.2812038949542638e-05, + "loss": 0.2385, + "step": 27648 + }, + { + "epoch": 2.72, + "eval_loss": 0.4252742528915405, + "eval_runtime": 32.4362, + "eval_samples_per_second": 105.962, + "eval_steps_per_second": 3.33, + "step": 27648 + }, + { + "epoch": 2.82, + "learning_rate": 2.180584243139569e-05, + "loss": 0.2371, + "step": 28672 + }, + { + "epoch": 2.82, + "eval_loss": 0.42966845631599426, + "eval_runtime": 32.3834, + "eval_samples_per_second": 106.135, + "eval_steps_per_second": 3.335, + "step": 28672 + }, + { + "epoch": 2.92, + "learning_rate": 2.079866233893971e-05, + "loss": 0.2373, + "step": 29696 + }, + { + "epoch": 2.92, + "eval_loss": 0.4231690466403961, + "eval_runtime": 32.3708, + "eval_samples_per_second": 106.176, + "eval_steps_per_second": 3.336, + "step": 29696 + }, + { + "epoch": 3.02, + "learning_rate": 1.97934493951018e-05, + "loss": 0.2368, + "step": 30720 + }, + { + "epoch": 3.02, + "eval_loss": 0.42956846952438354, + "eval_runtime": 32.3442, + "eval_samples_per_second": 106.263, + "eval_steps_per_second": 3.339, + "step": 30720 + }, + { + "epoch": 3.12, + "learning_rate": 1.8786269302645816e-05, + "loss": 0.2355, + "step": 31744 + }, + { + "epoch": 3.12, + "eval_loss": 0.43274641036987305, + "eval_runtime": 32.3365, + "eval_samples_per_second": 106.289, + "eval_steps_per_second": 3.34, + "step": 31744 + }, + { + "epoch": 3.22, + "learning_rate": 1.777908921018983e-05, + "loss": 0.2354, + "step": 32768 + }, + { + "epoch": 3.22, + "eval_loss": 0.4304845929145813, + "eval_runtime": 32.2799, + "eval_samples_per_second": 106.475, + "eval_steps_per_second": 3.346, + "step": 32768 + } + ], + "max_steps": 50835, + "num_train_epochs": 5, + "total_flos": 2.7417833655415603e+17, + "trial_name": null, + "trial_params": null +} diff --git a/manual_upload/checkpoint-32768/training_args.bin b/manual_upload/checkpoint-32768/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe0b86ed4a6ab1c86797fadcaa43d46ee74f7857 --- /dev/null +++ b/manual_upload/checkpoint-32768/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf489f059ab44f9bc26200d314d3fc5954dabfb501b51ecc19cdc4d4be8a527 +size 3579 diff --git a/manual_upload/checkpoint-36864/config.json b/manual_upload/checkpoint-36864/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e5041639331370e15270cc287c7f7a8566c79f1a --- /dev/null +++ b/manual_upload/checkpoint-36864/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "JammyMachina/elec-gmusic-familized-model-13-12__17-35-53", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_embd": 512, + "n_head": 8, + "n_inner": null, + "n_layer": 6, + "n_positions": 2048, + "pad_token_id": 1, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "torch_dtype": "float32", + "transformers_version": "4.26.0.dev0", + "use_cache": true, + "vocab_size": 301 +} diff --git a/manual_upload/checkpoint-36864/optimizer.pt b/manual_upload/checkpoint-36864/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cf703c8ee3f8e53c7b3950768ad5ee32a65424d --- /dev/null +++ b/manual_upload/checkpoint-36864/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ecad87a8937c4d031e8a7c3c520eb1b72e462241910d2d3263324e1dabdf805 +size 160988613 diff --git a/manual_upload/checkpoint-36864/pytorch_model.bin b/manual_upload/checkpoint-36864/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b79c317c83fb4000d7ab2d1af97fb75bfbedfbc3 --- /dev/null +++ b/manual_upload/checkpoint-36864/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f4e7b120db5f35f11faa3adbf92eb433f85fd1655d23f2fa34efb2f6ca0797f +size 105666297 diff --git a/manual_upload/checkpoint-36864/rng_state.pth b/manual_upload/checkpoint-36864/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..721bf24945718eb6eca3a0d9bfc57d78967ce670 --- /dev/null +++ b/manual_upload/checkpoint-36864/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e6ad785729770a83edfc217714d13846c8593f7224b6b958900618c84c55687 +size 17641 diff --git a/manual_upload/checkpoint-36864/scaler.pt b/manual_upload/checkpoint-36864/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b03341e81f037d5ad242fcad5a5f4c3e16ea1e8 --- /dev/null +++ b/manual_upload/checkpoint-36864/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:715831744278105fa1b716428f132c19d1f2e1ee6185f8f11745d5beb99422d6 +size 557 diff --git a/manual_upload/checkpoint-36864/scheduler.pt b/manual_upload/checkpoint-36864/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd6cbd921ca3ea2bc83ea452c5c7c279cf6c9f1d --- /dev/null +++ b/manual_upload/checkpoint-36864/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66adcb2e982eff562b9d0b90f4e01c5bf86d5d64f89c61e617b491e5417a40f5 +size 627 diff --git a/manual_upload/checkpoint-36864/trainer_state.json b/manual_upload/checkpoint-36864/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..523cda5d5ab4351d1cc79949bb51026346d99dc6 --- /dev/null +++ b/manual_upload/checkpoint-36864/trainer_state.json @@ -0,0 +1,520 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.6258483328415463, + "global_step": 36864, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "learning_rate": 4.899281990754402e-05, + "loss": 0.2509, + "step": 1024 + }, + { + "epoch": 0.1, + "eval_loss": 0.42676058411598206, + "eval_runtime": 32.3598, + "eval_samples_per_second": 106.212, + "eval_steps_per_second": 3.337, + "step": 1024 + }, + { + "epoch": 0.2, + "learning_rate": 4.798563981508803e-05, + "loss": 0.2521, + "step": 2048 + }, + { + "epoch": 0.2, + "eval_loss": 0.4283539652824402, + "eval_runtime": 32.4193, + "eval_samples_per_second": 106.017, + "eval_steps_per_second": 3.331, + "step": 2048 + }, + { + "epoch": 0.3, + "learning_rate": 4.697845972263204e-05, + "loss": 0.2533, + "step": 3072 + }, + { + "epoch": 0.3, + "eval_loss": 0.42187923192977905, + "eval_runtime": 32.3798, + "eval_samples_per_second": 106.146, + "eval_steps_per_second": 3.335, + "step": 3072 + }, + { + "epoch": 0.4, + "learning_rate": 4.597127963017606e-05, + "loss": 0.2517, + "step": 4096 + }, + { + "epoch": 0.4, + "eval_loss": 0.42451202869415283, + "eval_runtime": 32.3385, + "eval_samples_per_second": 106.282, + "eval_steps_per_second": 3.34, + "step": 4096 + }, + { + "epoch": 0.5, + "learning_rate": 4.496409953772008e-05, + "loss": 0.2512, + "step": 5120 + }, + { + "epoch": 0.5, + "eval_loss": 0.42285656929016113, + "eval_runtime": 32.3235, + "eval_samples_per_second": 106.331, + "eval_steps_per_second": 3.341, + "step": 5120 + }, + { + "epoch": 0.6, + "learning_rate": 4.3956919445264097e-05, + "loss": 0.2506, + "step": 6144 + }, + { + "epoch": 0.6, + "eval_loss": 0.4190705716609955, + "eval_runtime": 32.4437, + "eval_samples_per_second": 105.937, + "eval_steps_per_second": 3.329, + "step": 6144 + }, + { + "epoch": 0.71, + "learning_rate": 4.294973935280811e-05, + "loss": 0.2512, + "step": 7168 + }, + { + "epoch": 0.71, + "eval_loss": 0.4247213900089264, + "eval_runtime": 32.3289, + "eval_samples_per_second": 106.314, + "eval_steps_per_second": 3.341, + "step": 7168 + }, + { + "epoch": 0.81, + "learning_rate": 4.194255926035212e-05, + "loss": 0.2483, + "step": 8192 + }, + { + "epoch": 0.81, + "eval_loss": 0.4238651394844055, + "eval_runtime": 32.3862, + "eval_samples_per_second": 106.126, + "eval_steps_per_second": 3.335, + "step": 8192 + }, + { + "epoch": 0.91, + "learning_rate": 4.093537916789614e-05, + "loss": 0.2479, + "step": 9216 + }, + { + "epoch": 0.91, + "eval_loss": 0.4259129762649536, + "eval_runtime": 32.4627, + "eval_samples_per_second": 105.875, + "eval_steps_per_second": 3.327, + "step": 9216 + }, + { + "epoch": 1.01, + "learning_rate": 3.9928199075440155e-05, + "loss": 0.2498, + "step": 10240 + }, + { + "epoch": 1.01, + "eval_loss": 0.4262418746948242, + "eval_runtime": 32.3999, + "eval_samples_per_second": 106.081, + "eval_steps_per_second": 3.333, + "step": 10240 + }, + { + "epoch": 1.11, + "learning_rate": 3.8921018982984166e-05, + "loss": 0.2467, + "step": 11264 + }, + { + "epoch": 1.11, + "eval_loss": 0.4267333149909973, + "eval_runtime": 32.3308, + "eval_samples_per_second": 106.307, + "eval_steps_per_second": 3.34, + "step": 11264 + }, + { + "epoch": 1.21, + "learning_rate": 3.791482246483722e-05, + "loss": 0.2466, + "step": 12288 + }, + { + "epoch": 1.21, + "eval_loss": 0.4263165295124054, + "eval_runtime": 32.4348, + "eval_samples_per_second": 105.967, + "eval_steps_per_second": 3.33, + "step": 12288 + }, + { + "epoch": 1.31, + "learning_rate": 3.690764237238124e-05, + "loss": 0.2449, + "step": 13312 + }, + { + "epoch": 1.31, + "eval_loss": 0.42505738139152527, + "eval_runtime": 32.3762, + "eval_samples_per_second": 106.158, + "eval_steps_per_second": 3.336, + "step": 13312 + }, + { + "epoch": 1.41, + "learning_rate": 3.590144585423429e-05, + "loss": 0.2452, + "step": 14336 + }, + { + "epoch": 1.41, + "eval_loss": 0.42740598320961, + "eval_runtime": 32.44, + "eval_samples_per_second": 105.949, + "eval_steps_per_second": 3.329, + "step": 14336 + }, + { + "epoch": 1.51, + "learning_rate": 3.489426576177831e-05, + "loss": 0.2449, + "step": 15360 + }, + { + "epoch": 1.51, + "eval_loss": 0.42628249526023865, + "eval_runtime": 32.3172, + "eval_samples_per_second": 106.352, + "eval_steps_per_second": 3.342, + "step": 15360 + }, + { + "epoch": 1.61, + "learning_rate": 3.388708566932232e-05, + "loss": 0.2444, + "step": 16384 + }, + { + "epoch": 1.61, + "eval_loss": 0.42398524284362793, + "eval_runtime": 32.2909, + "eval_samples_per_second": 106.439, + "eval_steps_per_second": 3.345, + "step": 16384 + }, + { + "epoch": 1.71, + "learning_rate": 3.287990557686633e-05, + "loss": 0.2428, + "step": 17408 + }, + { + "epoch": 1.71, + "eval_loss": 0.42891454696655273, + "eval_runtime": 32.3773, + "eval_samples_per_second": 106.155, + "eval_steps_per_second": 3.336, + "step": 17408 + }, + { + "epoch": 1.81, + "learning_rate": 3.1873709058719384e-05, + "loss": 0.2425, + "step": 18432 + }, + { + "epoch": 1.81, + "eval_loss": 0.4228712022304535, + "eval_runtime": 32.4341, + "eval_samples_per_second": 105.969, + "eval_steps_per_second": 3.33, + "step": 18432 + }, + { + "epoch": 1.91, + "learning_rate": 3.08665289662634e-05, + "loss": 0.2424, + "step": 19456 + }, + { + "epoch": 1.91, + "eval_loss": 0.4291061758995056, + "eval_runtime": 32.3192, + "eval_samples_per_second": 106.345, + "eval_steps_per_second": 3.342, + "step": 19456 + }, + { + "epoch": 2.01, + "learning_rate": 2.985934887380742e-05, + "loss": 0.2422, + "step": 20480 + }, + { + "epoch": 2.01, + "eval_loss": 0.4246675968170166, + "eval_runtime": 32.2862, + "eval_samples_per_second": 106.454, + "eval_steps_per_second": 3.345, + "step": 20480 + }, + { + "epoch": 2.12, + "learning_rate": 2.8853152355660473e-05, + "loss": 0.2397, + "step": 21504 + }, + { + "epoch": 2.12, + "eval_loss": 0.42707231640815735, + "eval_runtime": 32.3373, + "eval_samples_per_second": 106.286, + "eval_steps_per_second": 3.34, + "step": 21504 + }, + { + "epoch": 2.22, + "learning_rate": 2.7846955837513527e-05, + "loss": 0.2397, + "step": 22528 + }, + { + "epoch": 2.22, + "eval_loss": 0.42262786626815796, + "eval_runtime": 32.3328, + "eval_samples_per_second": 106.301, + "eval_steps_per_second": 3.34, + "step": 22528 + }, + { + "epoch": 2.32, + "learning_rate": 2.6839775745057538e-05, + "loss": 0.2411, + "step": 23552 + }, + { + "epoch": 2.32, + "eval_loss": 0.42685696482658386, + "eval_runtime": 32.3962, + "eval_samples_per_second": 106.093, + "eval_steps_per_second": 3.334, + "step": 23552 + }, + { + "epoch": 2.42, + "learning_rate": 2.5832595652601556e-05, + "loss": 0.2408, + "step": 24576 + }, + { + "epoch": 2.42, + "eval_loss": 0.42877742648124695, + "eval_runtime": 32.3163, + "eval_samples_per_second": 106.355, + "eval_steps_per_second": 3.342, + "step": 24576 + }, + { + "epoch": 2.52, + "learning_rate": 2.482541556014557e-05, + "loss": 0.2392, + "step": 25600 + }, + { + "epoch": 2.52, + "eval_loss": 0.42227810621261597, + "eval_runtime": 32.369, + "eval_samples_per_second": 106.182, + "eval_steps_per_second": 3.337, + "step": 25600 + }, + { + "epoch": 2.62, + "learning_rate": 2.3819219041998624e-05, + "loss": 0.2391, + "step": 26624 + }, + { + "epoch": 2.62, + "eval_loss": 0.4296777546405792, + "eval_runtime": 32.4315, + "eval_samples_per_second": 105.977, + "eval_steps_per_second": 3.33, + "step": 26624 + }, + { + "epoch": 2.72, + "learning_rate": 2.2812038949542638e-05, + "loss": 0.2385, + "step": 27648 + }, + { + "epoch": 2.72, + "eval_loss": 0.4252742528915405, + "eval_runtime": 32.4362, + "eval_samples_per_second": 105.962, + "eval_steps_per_second": 3.33, + "step": 27648 + }, + { + "epoch": 2.82, + "learning_rate": 2.180584243139569e-05, + "loss": 0.2371, + "step": 28672 + }, + { + "epoch": 2.82, + "eval_loss": 0.42966845631599426, + "eval_runtime": 32.3834, + "eval_samples_per_second": 106.135, + "eval_steps_per_second": 3.335, + "step": 28672 + }, + { + "epoch": 2.92, + "learning_rate": 2.079866233893971e-05, + "loss": 0.2373, + "step": 29696 + }, + { + "epoch": 2.92, + "eval_loss": 0.4231690466403961, + "eval_runtime": 32.3708, + "eval_samples_per_second": 106.176, + "eval_steps_per_second": 3.336, + "step": 29696 + }, + { + "epoch": 3.02, + "learning_rate": 1.97934493951018e-05, + "loss": 0.2368, + "step": 30720 + }, + { + "epoch": 3.02, + "eval_loss": 0.42956846952438354, + "eval_runtime": 32.3442, + "eval_samples_per_second": 106.263, + "eval_steps_per_second": 3.339, + "step": 30720 + }, + { + "epoch": 3.12, + "learning_rate": 1.8786269302645816e-05, + "loss": 0.2355, + "step": 31744 + }, + { + "epoch": 3.12, + "eval_loss": 0.43274641036987305, + "eval_runtime": 32.3365, + "eval_samples_per_second": 106.289, + "eval_steps_per_second": 3.34, + "step": 31744 + }, + { + "epoch": 3.22, + "learning_rate": 1.777908921018983e-05, + "loss": 0.2354, + "step": 32768 + }, + { + "epoch": 3.22, + "eval_loss": 0.4304845929145813, + "eval_runtime": 32.2799, + "eval_samples_per_second": 106.475, + "eval_steps_per_second": 3.346, + "step": 32768 + }, + { + "epoch": 3.32, + "learning_rate": 1.6771909117733845e-05, + "loss": 0.2345, + "step": 33792 + }, + { + "epoch": 3.32, + "eval_loss": 0.4286292791366577, + "eval_runtime": 32.4389, + "eval_samples_per_second": 105.953, + "eval_steps_per_second": 3.329, + "step": 33792 + }, + { + "epoch": 3.42, + "learning_rate": 1.5765712599586898e-05, + "loss": 0.2355, + "step": 34816 + }, + { + "epoch": 3.42, + "eval_loss": 0.4350430965423584, + "eval_runtime": 32.3371, + "eval_samples_per_second": 106.287, + "eval_steps_per_second": 3.34, + "step": 34816 + }, + { + "epoch": 3.53, + "learning_rate": 1.4758532507130915e-05, + "loss": 0.2353, + "step": 35840 + }, + { + "epoch": 3.53, + "eval_loss": 0.4268806278705597, + "eval_runtime": 32.3956, + "eval_samples_per_second": 106.095, + "eval_steps_per_second": 3.334, + "step": 35840 + }, + { + "epoch": 3.63, + "learning_rate": 1.375233598898397e-05, + "loss": 0.2351, + "step": 36864 + }, + { + "epoch": 3.63, + "eval_loss": 0.43005427718162537, + "eval_runtime": 32.3262, + "eval_samples_per_second": 106.323, + "eval_steps_per_second": 3.341, + "step": 36864 + } + ], + "max_steps": 50835, + "num_train_epochs": 5, + "total_flos": 3.084517632593756e+17, + "trial_name": null, + "trial_params": null +} diff --git a/manual_upload/checkpoint-36864/training_args.bin b/manual_upload/checkpoint-36864/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe0b86ed4a6ab1c86797fadcaa43d46ee74f7857 --- /dev/null +++ b/manual_upload/checkpoint-36864/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf489f059ab44f9bc26200d314d3fc5954dabfb501b51ecc19cdc4d4be8a527 +size 3579 diff --git a/manual_upload/checkpoint-40960/config.json b/manual_upload/checkpoint-40960/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e5041639331370e15270cc287c7f7a8566c79f1a --- /dev/null +++ b/manual_upload/checkpoint-40960/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "JammyMachina/elec-gmusic-familized-model-13-12__17-35-53", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_embd": 512, + "n_head": 8, + "n_inner": null, + "n_layer": 6, + "n_positions": 2048, + "pad_token_id": 1, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "torch_dtype": "float32", + "transformers_version": "4.26.0.dev0", + "use_cache": true, + "vocab_size": 301 +} diff --git a/manual_upload/checkpoint-40960/optimizer.pt b/manual_upload/checkpoint-40960/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..29ad0bcf45ce81bc4640db4e28be2fb59771090d --- /dev/null +++ b/manual_upload/checkpoint-40960/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194d9ac39fd6dd0aaf4f3a0f1d87e4e89c633f97b4d433de39e518540f86b6b0 +size 160988613 diff --git a/manual_upload/checkpoint-40960/pytorch_model.bin b/manual_upload/checkpoint-40960/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..26611e9ba2c496b0c107829aa8d1064d70e5ff9c --- /dev/null +++ b/manual_upload/checkpoint-40960/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8efb0bfe8122bfb4130dae55d638dc709a3ecb9c674f8fac344a75876a3f5bfc +size 105666297 diff --git a/manual_upload/checkpoint-40960/rng_state.pth b/manual_upload/checkpoint-40960/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c2b5d7f3de6155378d3bc68cc230c338be20569 --- /dev/null +++ b/manual_upload/checkpoint-40960/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e855b6afcb0da09e80c4b9468135e5f0c700df448590420b405f2eca6b418a8b +size 17641 diff --git a/manual_upload/checkpoint-40960/scaler.pt b/manual_upload/checkpoint-40960/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..567ac39423a405b6163790004070a946a0ebdef1 --- /dev/null +++ b/manual_upload/checkpoint-40960/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c25c6f024cb9b63e362884769ba537bb0d48ae621b9dd00c56115f2a525a670 +size 557 diff --git a/manual_upload/checkpoint-40960/scheduler.pt b/manual_upload/checkpoint-40960/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..364ea333ca4c04b3cace27b4deeb78f6eeb581d9 --- /dev/null +++ b/manual_upload/checkpoint-40960/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f56085a1a47b3b808c6a2c228c3ef37976e4c202f414e76879f0ccc3f1c700 +size 627 diff --git a/manual_upload/checkpoint-40960/trainer_state.json b/manual_upload/checkpoint-40960/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e49d044e73fe31c7eca3c28ede14309f9d151fea --- /dev/null +++ b/manual_upload/checkpoint-40960/trainer_state.json @@ -0,0 +1,576 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.02872036982394, + "global_step": 40960, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "learning_rate": 4.899281990754402e-05, + "loss": 0.2509, + "step": 1024 + }, + { + "epoch": 0.1, + "eval_loss": 0.42676058411598206, + "eval_runtime": 32.3598, + "eval_samples_per_second": 106.212, + "eval_steps_per_second": 3.337, + "step": 1024 + }, + { + "epoch": 0.2, + "learning_rate": 4.798563981508803e-05, + "loss": 0.2521, + "step": 2048 + }, + { + "epoch": 0.2, + "eval_loss": 0.4283539652824402, + "eval_runtime": 32.4193, + "eval_samples_per_second": 106.017, + "eval_steps_per_second": 3.331, + "step": 2048 + }, + { + "epoch": 0.3, + "learning_rate": 4.697845972263204e-05, + "loss": 0.2533, + "step": 3072 + }, + { + "epoch": 0.3, + "eval_loss": 0.42187923192977905, + "eval_runtime": 32.3798, + "eval_samples_per_second": 106.146, + "eval_steps_per_second": 3.335, + "step": 3072 + }, + { + "epoch": 0.4, + "learning_rate": 4.597127963017606e-05, + "loss": 0.2517, + "step": 4096 + }, + { + "epoch": 0.4, + "eval_loss": 0.42451202869415283, + "eval_runtime": 32.3385, + "eval_samples_per_second": 106.282, + "eval_steps_per_second": 3.34, + "step": 4096 + }, + { + "epoch": 0.5, + "learning_rate": 4.496409953772008e-05, + "loss": 0.2512, + "step": 5120 + }, + { + "epoch": 0.5, + "eval_loss": 0.42285656929016113, + "eval_runtime": 32.3235, + "eval_samples_per_second": 106.331, + "eval_steps_per_second": 3.341, + "step": 5120 + }, + { + "epoch": 0.6, + "learning_rate": 4.3956919445264097e-05, + "loss": 0.2506, + "step": 6144 + }, + { + "epoch": 0.6, + "eval_loss": 0.4190705716609955, + "eval_runtime": 32.4437, + "eval_samples_per_second": 105.937, + "eval_steps_per_second": 3.329, + "step": 6144 + }, + { + "epoch": 0.71, + "learning_rate": 4.294973935280811e-05, + "loss": 0.2512, + "step": 7168 + }, + { + "epoch": 0.71, + "eval_loss": 0.4247213900089264, + "eval_runtime": 32.3289, + "eval_samples_per_second": 106.314, + "eval_steps_per_second": 3.341, + "step": 7168 + }, + { + "epoch": 0.81, + "learning_rate": 4.194255926035212e-05, + "loss": 0.2483, + "step": 8192 + }, + { + "epoch": 0.81, + "eval_loss": 0.4238651394844055, + "eval_runtime": 32.3862, + "eval_samples_per_second": 106.126, + "eval_steps_per_second": 3.335, + "step": 8192 + }, + { + "epoch": 0.91, + "learning_rate": 4.093537916789614e-05, + "loss": 0.2479, + "step": 9216 + }, + { + "epoch": 0.91, + "eval_loss": 0.4259129762649536, + "eval_runtime": 32.4627, + "eval_samples_per_second": 105.875, + "eval_steps_per_second": 3.327, + "step": 9216 + }, + { + "epoch": 1.01, + "learning_rate": 3.9928199075440155e-05, + "loss": 0.2498, + "step": 10240 + }, + { + "epoch": 1.01, + "eval_loss": 0.4262418746948242, + "eval_runtime": 32.3999, + "eval_samples_per_second": 106.081, + "eval_steps_per_second": 3.333, + "step": 10240 + }, + { + "epoch": 1.11, + "learning_rate": 3.8921018982984166e-05, + "loss": 0.2467, + "step": 11264 + }, + { + "epoch": 1.11, + "eval_loss": 0.4267333149909973, + "eval_runtime": 32.3308, + "eval_samples_per_second": 106.307, + "eval_steps_per_second": 3.34, + "step": 11264 + }, + { + "epoch": 1.21, + "learning_rate": 3.791482246483722e-05, + "loss": 0.2466, + "step": 12288 + }, + { + "epoch": 1.21, + "eval_loss": 0.4263165295124054, + "eval_runtime": 32.4348, + "eval_samples_per_second": 105.967, + "eval_steps_per_second": 3.33, + "step": 12288 + }, + { + "epoch": 1.31, + "learning_rate": 3.690764237238124e-05, + "loss": 0.2449, + "step": 13312 + }, + { + "epoch": 1.31, + "eval_loss": 0.42505738139152527, + "eval_runtime": 32.3762, + "eval_samples_per_second": 106.158, + "eval_steps_per_second": 3.336, + "step": 13312 + }, + { + "epoch": 1.41, + "learning_rate": 3.590144585423429e-05, + "loss": 0.2452, + "step": 14336 + }, + { + "epoch": 1.41, + "eval_loss": 0.42740598320961, + "eval_runtime": 32.44, + "eval_samples_per_second": 105.949, + "eval_steps_per_second": 3.329, + "step": 14336 + }, + { + "epoch": 1.51, + "learning_rate": 3.489426576177831e-05, + "loss": 0.2449, + "step": 15360 + }, + { + "epoch": 1.51, + "eval_loss": 0.42628249526023865, + "eval_runtime": 32.3172, + "eval_samples_per_second": 106.352, + "eval_steps_per_second": 3.342, + "step": 15360 + }, + { + "epoch": 1.61, + "learning_rate": 3.388708566932232e-05, + "loss": 0.2444, + "step": 16384 + }, + { + "epoch": 1.61, + "eval_loss": 0.42398524284362793, + "eval_runtime": 32.2909, + "eval_samples_per_second": 106.439, + "eval_steps_per_second": 3.345, + "step": 16384 + }, + { + "epoch": 1.71, + "learning_rate": 3.287990557686633e-05, + "loss": 0.2428, + "step": 17408 + }, + { + "epoch": 1.71, + "eval_loss": 0.42891454696655273, + "eval_runtime": 32.3773, + "eval_samples_per_second": 106.155, + "eval_steps_per_second": 3.336, + "step": 17408 + }, + { + "epoch": 1.81, + "learning_rate": 3.1873709058719384e-05, + "loss": 0.2425, + "step": 18432 + }, + { + "epoch": 1.81, + "eval_loss": 0.4228712022304535, + "eval_runtime": 32.4341, + "eval_samples_per_second": 105.969, + "eval_steps_per_second": 3.33, + "step": 18432 + }, + { + "epoch": 1.91, + "learning_rate": 3.08665289662634e-05, + "loss": 0.2424, + "step": 19456 + }, + { + "epoch": 1.91, + "eval_loss": 0.4291061758995056, + "eval_runtime": 32.3192, + "eval_samples_per_second": 106.345, + "eval_steps_per_second": 3.342, + "step": 19456 + }, + { + "epoch": 2.01, + "learning_rate": 2.985934887380742e-05, + "loss": 0.2422, + "step": 20480 + }, + { + "epoch": 2.01, + "eval_loss": 0.4246675968170166, + "eval_runtime": 32.2862, + "eval_samples_per_second": 106.454, + "eval_steps_per_second": 3.345, + "step": 20480 + }, + { + "epoch": 2.12, + "learning_rate": 2.8853152355660473e-05, + "loss": 0.2397, + "step": 21504 + }, + { + "epoch": 2.12, + "eval_loss": 0.42707231640815735, + "eval_runtime": 32.3373, + "eval_samples_per_second": 106.286, + "eval_steps_per_second": 3.34, + "step": 21504 + }, + { + "epoch": 2.22, + "learning_rate": 2.7846955837513527e-05, + "loss": 0.2397, + "step": 22528 + }, + { + "epoch": 2.22, + "eval_loss": 0.42262786626815796, + "eval_runtime": 32.3328, + "eval_samples_per_second": 106.301, + "eval_steps_per_second": 3.34, + "step": 22528 + }, + { + "epoch": 2.32, + "learning_rate": 2.6839775745057538e-05, + "loss": 0.2411, + "step": 23552 + }, + { + "epoch": 2.32, + "eval_loss": 0.42685696482658386, + "eval_runtime": 32.3962, + "eval_samples_per_second": 106.093, + "eval_steps_per_second": 3.334, + "step": 23552 + }, + { + "epoch": 2.42, + "learning_rate": 2.5832595652601556e-05, + "loss": 0.2408, + "step": 24576 + }, + { + "epoch": 2.42, + "eval_loss": 0.42877742648124695, + "eval_runtime": 32.3163, + "eval_samples_per_second": 106.355, + "eval_steps_per_second": 3.342, + "step": 24576 + }, + { + "epoch": 2.52, + "learning_rate": 2.482541556014557e-05, + "loss": 0.2392, + "step": 25600 + }, + { + "epoch": 2.52, + "eval_loss": 0.42227810621261597, + "eval_runtime": 32.369, + "eval_samples_per_second": 106.182, + "eval_steps_per_second": 3.337, + "step": 25600 + }, + { + "epoch": 2.62, + "learning_rate": 2.3819219041998624e-05, + "loss": 0.2391, + "step": 26624 + }, + { + "epoch": 2.62, + "eval_loss": 0.4296777546405792, + "eval_runtime": 32.4315, + "eval_samples_per_second": 105.977, + "eval_steps_per_second": 3.33, + "step": 26624 + }, + { + "epoch": 2.72, + "learning_rate": 2.2812038949542638e-05, + "loss": 0.2385, + "step": 27648 + }, + { + "epoch": 2.72, + "eval_loss": 0.4252742528915405, + "eval_runtime": 32.4362, + "eval_samples_per_second": 105.962, + "eval_steps_per_second": 3.33, + "step": 27648 + }, + { + "epoch": 2.82, + "learning_rate": 2.180584243139569e-05, + "loss": 0.2371, + "step": 28672 + }, + { + "epoch": 2.82, + "eval_loss": 0.42966845631599426, + "eval_runtime": 32.3834, + "eval_samples_per_second": 106.135, + "eval_steps_per_second": 3.335, + "step": 28672 + }, + { + "epoch": 2.92, + "learning_rate": 2.079866233893971e-05, + "loss": 0.2373, + "step": 29696 + }, + { + "epoch": 2.92, + "eval_loss": 0.4231690466403961, + "eval_runtime": 32.3708, + "eval_samples_per_second": 106.176, + "eval_steps_per_second": 3.336, + "step": 29696 + }, + { + "epoch": 3.02, + "learning_rate": 1.97934493951018e-05, + "loss": 0.2368, + "step": 30720 + }, + { + "epoch": 3.02, + "eval_loss": 0.42956846952438354, + "eval_runtime": 32.3442, + "eval_samples_per_second": 106.263, + "eval_steps_per_second": 3.339, + "step": 30720 + }, + { + "epoch": 3.12, + "learning_rate": 1.8786269302645816e-05, + "loss": 0.2355, + "step": 31744 + }, + { + "epoch": 3.12, + "eval_loss": 0.43274641036987305, + "eval_runtime": 32.3365, + "eval_samples_per_second": 106.289, + "eval_steps_per_second": 3.34, + "step": 31744 + }, + { + "epoch": 3.22, + "learning_rate": 1.777908921018983e-05, + "loss": 0.2354, + "step": 32768 + }, + { + "epoch": 3.22, + "eval_loss": 0.4304845929145813, + "eval_runtime": 32.2799, + "eval_samples_per_second": 106.475, + "eval_steps_per_second": 3.346, + "step": 32768 + }, + { + "epoch": 3.32, + "learning_rate": 1.6771909117733845e-05, + "loss": 0.2345, + "step": 33792 + }, + { + "epoch": 3.32, + "eval_loss": 0.4286292791366577, + "eval_runtime": 32.4389, + "eval_samples_per_second": 105.953, + "eval_steps_per_second": 3.329, + "step": 33792 + }, + { + "epoch": 3.42, + "learning_rate": 1.5765712599586898e-05, + "loss": 0.2355, + "step": 34816 + }, + { + "epoch": 3.42, + "eval_loss": 0.4350430965423584, + "eval_runtime": 32.3371, + "eval_samples_per_second": 106.287, + "eval_steps_per_second": 3.34, + "step": 34816 + }, + { + "epoch": 3.53, + "learning_rate": 1.4758532507130915e-05, + "loss": 0.2353, + "step": 35840 + }, + { + "epoch": 3.53, + "eval_loss": 0.4268806278705597, + "eval_runtime": 32.3956, + "eval_samples_per_second": 106.095, + "eval_steps_per_second": 3.334, + "step": 35840 + }, + { + "epoch": 3.63, + "learning_rate": 1.375233598898397e-05, + "loss": 0.2351, + "step": 36864 + }, + { + "epoch": 3.63, + "eval_loss": 0.43005427718162537, + "eval_runtime": 32.3262, + "eval_samples_per_second": 106.323, + "eval_steps_per_second": 3.341, + "step": 36864 + }, + { + "epoch": 3.73, + "learning_rate": 1.2745155896527982e-05, + "loss": 0.2336, + "step": 37888 + }, + { + "epoch": 3.73, + "eval_loss": 0.4301435649394989, + "eval_runtime": 32.4031, + "eval_samples_per_second": 106.07, + "eval_steps_per_second": 3.333, + "step": 37888 + }, + { + "epoch": 3.83, + "learning_rate": 1.1737975804071997e-05, + "loss": 0.2344, + "step": 38912 + }, + { + "epoch": 3.83, + "eval_loss": 0.43188127875328064, + "eval_runtime": 32.3893, + "eval_samples_per_second": 106.115, + "eval_steps_per_second": 3.334, + "step": 38912 + }, + { + "epoch": 3.93, + "learning_rate": 1.0730795711616013e-05, + "loss": 0.2339, + "step": 39936 + }, + { + "epoch": 3.93, + "eval_loss": 0.4304964244365692, + "eval_runtime": 32.4411, + "eval_samples_per_second": 105.946, + "eval_steps_per_second": 3.329, + "step": 39936 + }, + { + "epoch": 4.03, + "learning_rate": 9.724599193469066e-06, + "loss": 0.2326, + "step": 40960 + }, + { + "epoch": 4.03, + "eval_loss": 0.4298175573348999, + "eval_runtime": 32.3377, + "eval_samples_per_second": 106.285, + "eval_steps_per_second": 3.34, + "step": 40960 + } + ], + "max_steps": 50835, + "num_train_epochs": 5, + "total_flos": 3.427221683544392e+17, + "trial_name": null, + "trial_params": null +} diff --git a/manual_upload/checkpoint-40960/training_args.bin b/manual_upload/checkpoint-40960/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe0b86ed4a6ab1c86797fadcaa43d46ee74f7857 --- /dev/null +++ b/manual_upload/checkpoint-40960/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf489f059ab44f9bc26200d314d3fc5954dabfb501b51ecc19cdc4d4be8a527 +size 3579 diff --git a/manual_upload/checkpoint-45056/config.json b/manual_upload/checkpoint-45056/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e5041639331370e15270cc287c7f7a8566c79f1a --- /dev/null +++ b/manual_upload/checkpoint-45056/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "JammyMachina/elec-gmusic-familized-model-13-12__17-35-53", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_embd": 512, + "n_head": 8, + "n_inner": null, + "n_layer": 6, + "n_positions": 2048, + "pad_token_id": 1, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "torch_dtype": "float32", + "transformers_version": "4.26.0.dev0", + "use_cache": true, + "vocab_size": 301 +} diff --git a/manual_upload/checkpoint-45056/optimizer.pt b/manual_upload/checkpoint-45056/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac0185ed676c0508177fdcc061a53c65d9a82ff4 --- /dev/null +++ b/manual_upload/checkpoint-45056/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb25a88b4f868be37a211bbfe71588d79f1c927d69f5d3f11133e0cbe67517d +size 160988613 diff --git a/manual_upload/checkpoint-45056/pytorch_model.bin b/manual_upload/checkpoint-45056/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b3efa923c6a3b21027fcf5d3040a46727b17639 --- /dev/null +++ b/manual_upload/checkpoint-45056/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2a69e8041c07b0e1b540e5b51b1b044c092342f9fd2232de400c7e53171958 +size 105666297 diff --git a/manual_upload/checkpoint-45056/rng_state.pth b/manual_upload/checkpoint-45056/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f4da228d93de477b18d9142bea7b7d0a1dbd1e4a --- /dev/null +++ b/manual_upload/checkpoint-45056/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607ec2b514f7c4db26f95270a92ea283778558468bedb1530ef4abddb3181280 +size 17641 diff --git a/manual_upload/checkpoint-45056/scaler.pt b/manual_upload/checkpoint-45056/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..47dbde858c63f5d47e3e8e58eaf7c7cbbe27f623 --- /dev/null +++ b/manual_upload/checkpoint-45056/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ee5373288a1b404edc3bf8f1daf186df8ebf4b253d245d2ab25ee604c2c6d8 +size 557 diff --git a/manual_upload/checkpoint-45056/scheduler.pt b/manual_upload/checkpoint-45056/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fdef378d98e02e7d3a6a4040b9070d3492ce930 --- /dev/null +++ b/manual_upload/checkpoint-45056/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f4bc96987f4affa8d95c834faa24ef6b88121c6d49156ae5db03f559ceb3af +size 627 diff --git a/manual_upload/checkpoint-45056/trainer_state.json b/manual_upload/checkpoint-45056/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cd3439c9570ef5e92b6784707eeff0d5690255e2 --- /dev/null +++ b/manual_upload/checkpoint-45056/trainer_state.json @@ -0,0 +1,632 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.431592406806335, + "global_step": 45056, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "learning_rate": 4.899281990754402e-05, + "loss": 0.2509, + "step": 1024 + }, + { + "epoch": 0.1, + "eval_loss": 0.42676058411598206, + "eval_runtime": 32.3598, + "eval_samples_per_second": 106.212, + "eval_steps_per_second": 3.337, + "step": 1024 + }, + { + "epoch": 0.2, + "learning_rate": 4.798563981508803e-05, + "loss": 0.2521, + "step": 2048 + }, + { + "epoch": 0.2, + "eval_loss": 0.4283539652824402, + "eval_runtime": 32.4193, + "eval_samples_per_second": 106.017, + "eval_steps_per_second": 3.331, + "step": 2048 + }, + { + "epoch": 0.3, + "learning_rate": 4.697845972263204e-05, + "loss": 0.2533, + "step": 3072 + }, + { + "epoch": 0.3, + "eval_loss": 0.42187923192977905, + "eval_runtime": 32.3798, + "eval_samples_per_second": 106.146, + "eval_steps_per_second": 3.335, + "step": 3072 + }, + { + "epoch": 0.4, + "learning_rate": 4.597127963017606e-05, + "loss": 0.2517, + "step": 4096 + }, + { + "epoch": 0.4, + "eval_loss": 0.42451202869415283, + "eval_runtime": 32.3385, + "eval_samples_per_second": 106.282, + "eval_steps_per_second": 3.34, + "step": 4096 + }, + { + "epoch": 0.5, + "learning_rate": 4.496409953772008e-05, + "loss": 0.2512, + "step": 5120 + }, + { + "epoch": 0.5, + "eval_loss": 0.42285656929016113, + "eval_runtime": 32.3235, + "eval_samples_per_second": 106.331, + "eval_steps_per_second": 3.341, + "step": 5120 + }, + { + "epoch": 0.6, + "learning_rate": 4.3956919445264097e-05, + "loss": 0.2506, + "step": 6144 + }, + { + "epoch": 0.6, + "eval_loss": 0.4190705716609955, + "eval_runtime": 32.4437, + "eval_samples_per_second": 105.937, + "eval_steps_per_second": 3.329, + "step": 6144 + }, + { + "epoch": 0.71, + "learning_rate": 4.294973935280811e-05, + "loss": 0.2512, + "step": 7168 + }, + { + "epoch": 0.71, + "eval_loss": 0.4247213900089264, + "eval_runtime": 32.3289, + "eval_samples_per_second": 106.314, + "eval_steps_per_second": 3.341, + "step": 7168 + }, + { + "epoch": 0.81, + "learning_rate": 4.194255926035212e-05, + "loss": 0.2483, + "step": 8192 + }, + { + "epoch": 0.81, + "eval_loss": 0.4238651394844055, + "eval_runtime": 32.3862, + "eval_samples_per_second": 106.126, + "eval_steps_per_second": 3.335, + "step": 8192 + }, + { + "epoch": 0.91, + "learning_rate": 4.093537916789614e-05, + "loss": 0.2479, + "step": 9216 + }, + { + "epoch": 0.91, + "eval_loss": 0.4259129762649536, + "eval_runtime": 32.4627, + "eval_samples_per_second": 105.875, + "eval_steps_per_second": 3.327, + "step": 9216 + }, + { + "epoch": 1.01, + "learning_rate": 3.9928199075440155e-05, + "loss": 0.2498, + "step": 10240 + }, + { + "epoch": 1.01, + "eval_loss": 0.4262418746948242, + "eval_runtime": 32.3999, + "eval_samples_per_second": 106.081, + "eval_steps_per_second": 3.333, + "step": 10240 + }, + { + "epoch": 1.11, + "learning_rate": 3.8921018982984166e-05, + "loss": 0.2467, + "step": 11264 + }, + { + "epoch": 1.11, + "eval_loss": 0.4267333149909973, + "eval_runtime": 32.3308, + "eval_samples_per_second": 106.307, + "eval_steps_per_second": 3.34, + "step": 11264 + }, + { + "epoch": 1.21, + "learning_rate": 3.791482246483722e-05, + "loss": 0.2466, + "step": 12288 + }, + { + "epoch": 1.21, + "eval_loss": 0.4263165295124054, + "eval_runtime": 32.4348, + "eval_samples_per_second": 105.967, + "eval_steps_per_second": 3.33, + "step": 12288 + }, + { + "epoch": 1.31, + "learning_rate": 3.690764237238124e-05, + "loss": 0.2449, + "step": 13312 + }, + { + "epoch": 1.31, + "eval_loss": 0.42505738139152527, + "eval_runtime": 32.3762, + "eval_samples_per_second": 106.158, + "eval_steps_per_second": 3.336, + "step": 13312 + }, + { + "epoch": 1.41, + "learning_rate": 3.590144585423429e-05, + "loss": 0.2452, + "step": 14336 + }, + { + "epoch": 1.41, + "eval_loss": 0.42740598320961, + "eval_runtime": 32.44, + "eval_samples_per_second": 105.949, + "eval_steps_per_second": 3.329, + "step": 14336 + }, + { + "epoch": 1.51, + "learning_rate": 3.489426576177831e-05, + "loss": 0.2449, + "step": 15360 + }, + { + "epoch": 1.51, + "eval_loss": 0.42628249526023865, + "eval_runtime": 32.3172, + "eval_samples_per_second": 106.352, + "eval_steps_per_second": 3.342, + "step": 15360 + }, + { + "epoch": 1.61, + "learning_rate": 3.388708566932232e-05, + "loss": 0.2444, + "step": 16384 + }, + { + "epoch": 1.61, + "eval_loss": 0.42398524284362793, + "eval_runtime": 32.2909, + "eval_samples_per_second": 106.439, + "eval_steps_per_second": 3.345, + "step": 16384 + }, + { + "epoch": 1.71, + "learning_rate": 3.287990557686633e-05, + "loss": 0.2428, + "step": 17408 + }, + { + "epoch": 1.71, + "eval_loss": 0.42891454696655273, + "eval_runtime": 32.3773, + "eval_samples_per_second": 106.155, + "eval_steps_per_second": 3.336, + "step": 17408 + }, + { + "epoch": 1.81, + "learning_rate": 3.1873709058719384e-05, + "loss": 0.2425, + "step": 18432 + }, + { + "epoch": 1.81, + "eval_loss": 0.4228712022304535, + "eval_runtime": 32.4341, + "eval_samples_per_second": 105.969, + "eval_steps_per_second": 3.33, + "step": 18432 + }, + { + "epoch": 1.91, + "learning_rate": 3.08665289662634e-05, + "loss": 0.2424, + "step": 19456 + }, + { + "epoch": 1.91, + "eval_loss": 0.4291061758995056, + "eval_runtime": 32.3192, + "eval_samples_per_second": 106.345, + "eval_steps_per_second": 3.342, + "step": 19456 + }, + { + "epoch": 2.01, + "learning_rate": 2.985934887380742e-05, + "loss": 0.2422, + "step": 20480 + }, + { + "epoch": 2.01, + "eval_loss": 0.4246675968170166, + "eval_runtime": 32.2862, + "eval_samples_per_second": 106.454, + "eval_steps_per_second": 3.345, + "step": 20480 + }, + { + "epoch": 2.12, + "learning_rate": 2.8853152355660473e-05, + "loss": 0.2397, + "step": 21504 + }, + { + "epoch": 2.12, + "eval_loss": 0.42707231640815735, + "eval_runtime": 32.3373, + "eval_samples_per_second": 106.286, + "eval_steps_per_second": 3.34, + "step": 21504 + }, + { + "epoch": 2.22, + "learning_rate": 2.7846955837513527e-05, + "loss": 0.2397, + "step": 22528 + }, + { + "epoch": 2.22, + "eval_loss": 0.42262786626815796, + "eval_runtime": 32.3328, + "eval_samples_per_second": 106.301, + "eval_steps_per_second": 3.34, + "step": 22528 + }, + { + "epoch": 2.32, + "learning_rate": 2.6839775745057538e-05, + "loss": 0.2411, + "step": 23552 + }, + { + "epoch": 2.32, + "eval_loss": 0.42685696482658386, + "eval_runtime": 32.3962, + "eval_samples_per_second": 106.093, + "eval_steps_per_second": 3.334, + "step": 23552 + }, + { + "epoch": 2.42, + "learning_rate": 2.5832595652601556e-05, + "loss": 0.2408, + "step": 24576 + }, + { + "epoch": 2.42, + "eval_loss": 0.42877742648124695, + "eval_runtime": 32.3163, + "eval_samples_per_second": 106.355, + "eval_steps_per_second": 3.342, + "step": 24576 + }, + { + "epoch": 2.52, + "learning_rate": 2.482541556014557e-05, + "loss": 0.2392, + "step": 25600 + }, + { + "epoch": 2.52, + "eval_loss": 0.42227810621261597, + "eval_runtime": 32.369, + "eval_samples_per_second": 106.182, + "eval_steps_per_second": 3.337, + "step": 25600 + }, + { + "epoch": 2.62, + "learning_rate": 2.3819219041998624e-05, + "loss": 0.2391, + "step": 26624 + }, + { + "epoch": 2.62, + "eval_loss": 0.4296777546405792, + "eval_runtime": 32.4315, + "eval_samples_per_second": 105.977, + "eval_steps_per_second": 3.33, + "step": 26624 + }, + { + "epoch": 2.72, + "learning_rate": 2.2812038949542638e-05, + "loss": 0.2385, + "step": 27648 + }, + { + "epoch": 2.72, + "eval_loss": 0.4252742528915405, + "eval_runtime": 32.4362, + "eval_samples_per_second": 105.962, + "eval_steps_per_second": 3.33, + "step": 27648 + }, + { + "epoch": 2.82, + "learning_rate": 2.180584243139569e-05, + "loss": 0.2371, + "step": 28672 + }, + { + "epoch": 2.82, + "eval_loss": 0.42966845631599426, + "eval_runtime": 32.3834, + "eval_samples_per_second": 106.135, + "eval_steps_per_second": 3.335, + "step": 28672 + }, + { + "epoch": 2.92, + "learning_rate": 2.079866233893971e-05, + "loss": 0.2373, + "step": 29696 + }, + { + "epoch": 2.92, + "eval_loss": 0.4231690466403961, + "eval_runtime": 32.3708, + "eval_samples_per_second": 106.176, + "eval_steps_per_second": 3.336, + "step": 29696 + }, + { + "epoch": 3.02, + "learning_rate": 1.97934493951018e-05, + "loss": 0.2368, + "step": 30720 + }, + { + "epoch": 3.02, + "eval_loss": 0.42956846952438354, + "eval_runtime": 32.3442, + "eval_samples_per_second": 106.263, + "eval_steps_per_second": 3.339, + "step": 30720 + }, + { + "epoch": 3.12, + "learning_rate": 1.8786269302645816e-05, + "loss": 0.2355, + "step": 31744 + }, + { + "epoch": 3.12, + "eval_loss": 0.43274641036987305, + "eval_runtime": 32.3365, + "eval_samples_per_second": 106.289, + "eval_steps_per_second": 3.34, + "step": 31744 + }, + { + "epoch": 3.22, + "learning_rate": 1.777908921018983e-05, + "loss": 0.2354, + "step": 32768 + }, + { + "epoch": 3.22, + "eval_loss": 0.4304845929145813, + "eval_runtime": 32.2799, + "eval_samples_per_second": 106.475, + "eval_steps_per_second": 3.346, + "step": 32768 + }, + { + "epoch": 3.32, + "learning_rate": 1.6771909117733845e-05, + "loss": 0.2345, + "step": 33792 + }, + { + "epoch": 3.32, + "eval_loss": 0.4286292791366577, + "eval_runtime": 32.4389, + "eval_samples_per_second": 105.953, + "eval_steps_per_second": 3.329, + "step": 33792 + }, + { + "epoch": 3.42, + "learning_rate": 1.5765712599586898e-05, + "loss": 0.2355, + "step": 34816 + }, + { + "epoch": 3.42, + "eval_loss": 0.4350430965423584, + "eval_runtime": 32.3371, + "eval_samples_per_second": 106.287, + "eval_steps_per_second": 3.34, + "step": 34816 + }, + { + "epoch": 3.53, + "learning_rate": 1.4758532507130915e-05, + "loss": 0.2353, + "step": 35840 + }, + { + "epoch": 3.53, + "eval_loss": 0.4268806278705597, + "eval_runtime": 32.3956, + "eval_samples_per_second": 106.095, + "eval_steps_per_second": 3.334, + "step": 35840 + }, + { + "epoch": 3.63, + "learning_rate": 1.375233598898397e-05, + "loss": 0.2351, + "step": 36864 + }, + { + "epoch": 3.63, + "eval_loss": 0.43005427718162537, + "eval_runtime": 32.3262, + "eval_samples_per_second": 106.323, + "eval_steps_per_second": 3.341, + "step": 36864 + }, + { + "epoch": 3.73, + "learning_rate": 1.2745155896527982e-05, + "loss": 0.2336, + "step": 37888 + }, + { + "epoch": 3.73, + "eval_loss": 0.4301435649394989, + "eval_runtime": 32.4031, + "eval_samples_per_second": 106.07, + "eval_steps_per_second": 3.333, + "step": 37888 + }, + { + "epoch": 3.83, + "learning_rate": 1.1737975804071997e-05, + "loss": 0.2344, + "step": 38912 + }, + { + "epoch": 3.83, + "eval_loss": 0.43188127875328064, + "eval_runtime": 32.3893, + "eval_samples_per_second": 106.115, + "eval_steps_per_second": 3.334, + "step": 38912 + }, + { + "epoch": 3.93, + "learning_rate": 1.0730795711616013e-05, + "loss": 0.2339, + "step": 39936 + }, + { + "epoch": 3.93, + "eval_loss": 0.4304964244365692, + "eval_runtime": 32.4411, + "eval_samples_per_second": 105.946, + "eval_steps_per_second": 3.329, + "step": 39936 + }, + { + "epoch": 4.03, + "learning_rate": 9.724599193469066e-06, + "loss": 0.2326, + "step": 40960 + }, + { + "epoch": 4.03, + "eval_loss": 0.4298175573348999, + "eval_runtime": 32.3377, + "eval_samples_per_second": 106.285, + "eval_steps_per_second": 3.34, + "step": 40960 + }, + { + "epoch": 4.13, + "learning_rate": 8.718402675322121e-06, + "loss": 0.2316, + "step": 41984 + }, + { + "epoch": 4.13, + "eval_loss": 0.43077352643013, + "eval_runtime": 32.3835, + "eval_samples_per_second": 106.134, + "eval_steps_per_second": 3.335, + "step": 41984 + }, + { + "epoch": 4.23, + "learning_rate": 7.711222582866136e-06, + "loss": 0.2311, + "step": 43008 + }, + { + "epoch": 4.23, + "eval_loss": 0.43301910161972046, + "eval_runtime": 32.3178, + "eval_samples_per_second": 106.35, + "eval_steps_per_second": 3.342, + "step": 43008 + }, + { + "epoch": 4.33, + "learning_rate": 6.704042490410151e-06, + "loss": 0.2315, + "step": 44032 + }, + { + "epoch": 4.33, + "eval_loss": 0.4313049912452698, + "eval_runtime": 32.3292, + "eval_samples_per_second": 106.313, + "eval_steps_per_second": 3.341, + "step": 44032 + }, + { + "epoch": 4.43, + "learning_rate": 5.697845972263205e-06, + "loss": 0.2305, + "step": 45056 + }, + { + "epoch": 4.43, + "eval_loss": 0.43192604184150696, + "eval_runtime": 32.2814, + "eval_samples_per_second": 106.47, + "eval_steps_per_second": 3.346, + "step": 45056 + } + ], + "max_steps": 50835, + "num_train_epochs": 5, + "total_flos": 3.7699559505965875e+17, + "trial_name": null, + "trial_params": null +} diff --git a/manual_upload/checkpoint-45056/training_args.bin b/manual_upload/checkpoint-45056/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe0b86ed4a6ab1c86797fadcaa43d46ee74f7857 --- /dev/null +++ b/manual_upload/checkpoint-45056/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf489f059ab44f9bc26200d314d3fc5954dabfb501b51ecc19cdc4d4be8a527 +size 3579 diff --git a/manual_upload/checkpoint-49152/config.json b/manual_upload/checkpoint-49152/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e5041639331370e15270cc287c7f7a8566c79f1a --- /dev/null +++ b/manual_upload/checkpoint-49152/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "JammyMachina/elec-gmusic-familized-model-13-12__17-35-53", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_embd": 512, + "n_head": 8, + "n_inner": null, + "n_layer": 6, + "n_positions": 2048, + "pad_token_id": 1, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "torch_dtype": "float32", + "transformers_version": "4.26.0.dev0", + "use_cache": true, + "vocab_size": 301 +} diff --git a/manual_upload/checkpoint-49152/optimizer.pt b/manual_upload/checkpoint-49152/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b46ae118cf6c578ae283a45f645957ba723160b --- /dev/null +++ b/manual_upload/checkpoint-49152/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b7e2a0740ea7af2c820feefc87f6bb5d5fb3ea9d38943e7d92bb41fe6d22336 +size 160988613 diff --git a/manual_upload/checkpoint-49152/pytorch_model.bin b/manual_upload/checkpoint-49152/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..55a4c0c90fc9deaf0afc8d9b95205377be4e5737 --- /dev/null +++ b/manual_upload/checkpoint-49152/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d6da6afb62de8c2fa4fdfb2e9a3ca92e20dcd5098fa2cd9b8378c3874a8dda +size 105666297 diff --git a/manual_upload/checkpoint-49152/rng_state.pth b/manual_upload/checkpoint-49152/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..97dcb829bf73711c32095392ff6e73e5b9c5fa03 --- /dev/null +++ b/manual_upload/checkpoint-49152/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73950719406f404be311f8a4ce16f99b065695c64745fd75c4afbd36d6abc6a1 +size 17641 diff --git a/manual_upload/checkpoint-49152/scaler.pt b/manual_upload/checkpoint-49152/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b623a27cc0abcaf0f583abe318ce3d640ad96c74 --- /dev/null +++ b/manual_upload/checkpoint-49152/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e38ca102a5d144e05116c24a90abd23d8a0607bdf7add251bbd55f4d724fdd7 +size 557 diff --git a/manual_upload/checkpoint-49152/scheduler.pt b/manual_upload/checkpoint-49152/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a67c32aebada9b695948ada21fc28e03b54f43d --- /dev/null +++ b/manual_upload/checkpoint-49152/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a24782e94a439d25d01c08ba8e2f88a577cf001123dd31e97300ec78f3c46fa0 +size 627 diff --git a/manual_upload/checkpoint-49152/trainer_state.json b/manual_upload/checkpoint-49152/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cacb9f427bf0360c20c57b423d049f06e7d509c2 --- /dev/null +++ b/manual_upload/checkpoint-49152/trainer_state.json @@ -0,0 +1,688 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.834464443788728, + "global_step": 49152, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "learning_rate": 4.899281990754402e-05, + "loss": 0.2509, + "step": 1024 + }, + { + "epoch": 0.1, + "eval_loss": 0.42676058411598206, + "eval_runtime": 32.3598, + "eval_samples_per_second": 106.212, + "eval_steps_per_second": 3.337, + "step": 1024 + }, + { + "epoch": 0.2, + "learning_rate": 4.798563981508803e-05, + "loss": 0.2521, + "step": 2048 + }, + { + "epoch": 0.2, + "eval_loss": 0.4283539652824402, + "eval_runtime": 32.4193, + "eval_samples_per_second": 106.017, + "eval_steps_per_second": 3.331, + "step": 2048 + }, + { + "epoch": 0.3, + "learning_rate": 4.697845972263204e-05, + "loss": 0.2533, + "step": 3072 + }, + { + "epoch": 0.3, + "eval_loss": 0.42187923192977905, + "eval_runtime": 32.3798, + "eval_samples_per_second": 106.146, + "eval_steps_per_second": 3.335, + "step": 3072 + }, + { + "epoch": 0.4, + "learning_rate": 4.597127963017606e-05, + "loss": 0.2517, + "step": 4096 + }, + { + "epoch": 0.4, + "eval_loss": 0.42451202869415283, + "eval_runtime": 32.3385, + "eval_samples_per_second": 106.282, + "eval_steps_per_second": 3.34, + "step": 4096 + }, + { + "epoch": 0.5, + "learning_rate": 4.496409953772008e-05, + "loss": 0.2512, + "step": 5120 + }, + { + "epoch": 0.5, + "eval_loss": 0.42285656929016113, + "eval_runtime": 32.3235, + "eval_samples_per_second": 106.331, + "eval_steps_per_second": 3.341, + "step": 5120 + }, + { + "epoch": 0.6, + "learning_rate": 4.3956919445264097e-05, + "loss": 0.2506, + "step": 6144 + }, + { + "epoch": 0.6, + "eval_loss": 0.4190705716609955, + "eval_runtime": 32.4437, + "eval_samples_per_second": 105.937, + "eval_steps_per_second": 3.329, + "step": 6144 + }, + { + "epoch": 0.71, + "learning_rate": 4.294973935280811e-05, + "loss": 0.2512, + "step": 7168 + }, + { + "epoch": 0.71, + "eval_loss": 0.4247213900089264, + "eval_runtime": 32.3289, + "eval_samples_per_second": 106.314, + "eval_steps_per_second": 3.341, + "step": 7168 + }, + { + "epoch": 0.81, + "learning_rate": 4.194255926035212e-05, + "loss": 0.2483, + "step": 8192 + }, + { + "epoch": 0.81, + "eval_loss": 0.4238651394844055, + "eval_runtime": 32.3862, + "eval_samples_per_second": 106.126, + "eval_steps_per_second": 3.335, + "step": 8192 + }, + { + "epoch": 0.91, + "learning_rate": 4.093537916789614e-05, + "loss": 0.2479, + "step": 9216 + }, + { + "epoch": 0.91, + "eval_loss": 0.4259129762649536, + "eval_runtime": 32.4627, + "eval_samples_per_second": 105.875, + "eval_steps_per_second": 3.327, + "step": 9216 + }, + { + "epoch": 1.01, + "learning_rate": 3.9928199075440155e-05, + "loss": 0.2498, + "step": 10240 + }, + { + "epoch": 1.01, + "eval_loss": 0.4262418746948242, + "eval_runtime": 32.3999, + "eval_samples_per_second": 106.081, + "eval_steps_per_second": 3.333, + "step": 10240 + }, + { + "epoch": 1.11, + "learning_rate": 3.8921018982984166e-05, + "loss": 0.2467, + "step": 11264 + }, + { + "epoch": 1.11, + "eval_loss": 0.4267333149909973, + "eval_runtime": 32.3308, + "eval_samples_per_second": 106.307, + "eval_steps_per_second": 3.34, + "step": 11264 + }, + { + "epoch": 1.21, + "learning_rate": 3.791482246483722e-05, + "loss": 0.2466, + "step": 12288 + }, + { + "epoch": 1.21, + "eval_loss": 0.4263165295124054, + "eval_runtime": 32.4348, + "eval_samples_per_second": 105.967, + "eval_steps_per_second": 3.33, + "step": 12288 + }, + { + "epoch": 1.31, + "learning_rate": 3.690764237238124e-05, + "loss": 0.2449, + "step": 13312 + }, + { + "epoch": 1.31, + "eval_loss": 0.42505738139152527, + "eval_runtime": 32.3762, + "eval_samples_per_second": 106.158, + "eval_steps_per_second": 3.336, + "step": 13312 + }, + { + "epoch": 1.41, + "learning_rate": 3.590144585423429e-05, + "loss": 0.2452, + "step": 14336 + }, + { + "epoch": 1.41, + "eval_loss": 0.42740598320961, + "eval_runtime": 32.44, + "eval_samples_per_second": 105.949, + "eval_steps_per_second": 3.329, + "step": 14336 + }, + { + "epoch": 1.51, + "learning_rate": 3.489426576177831e-05, + "loss": 0.2449, + "step": 15360 + }, + { + "epoch": 1.51, + "eval_loss": 0.42628249526023865, + "eval_runtime": 32.3172, + "eval_samples_per_second": 106.352, + "eval_steps_per_second": 3.342, + "step": 15360 + }, + { + "epoch": 1.61, + "learning_rate": 3.388708566932232e-05, + "loss": 0.2444, + "step": 16384 + }, + { + "epoch": 1.61, + "eval_loss": 0.42398524284362793, + "eval_runtime": 32.2909, + "eval_samples_per_second": 106.439, + "eval_steps_per_second": 3.345, + "step": 16384 + }, + { + "epoch": 1.71, + "learning_rate": 3.287990557686633e-05, + "loss": 0.2428, + "step": 17408 + }, + { + "epoch": 1.71, + "eval_loss": 0.42891454696655273, + "eval_runtime": 32.3773, + "eval_samples_per_second": 106.155, + "eval_steps_per_second": 3.336, + "step": 17408 + }, + { + "epoch": 1.81, + "learning_rate": 3.1873709058719384e-05, + "loss": 0.2425, + "step": 18432 + }, + { + "epoch": 1.81, + "eval_loss": 0.4228712022304535, + "eval_runtime": 32.4341, + "eval_samples_per_second": 105.969, + "eval_steps_per_second": 3.33, + "step": 18432 + }, + { + "epoch": 1.91, + "learning_rate": 3.08665289662634e-05, + "loss": 0.2424, + "step": 19456 + }, + { + "epoch": 1.91, + "eval_loss": 0.4291061758995056, + "eval_runtime": 32.3192, + "eval_samples_per_second": 106.345, + "eval_steps_per_second": 3.342, + "step": 19456 + }, + { + "epoch": 2.01, + "learning_rate": 2.985934887380742e-05, + "loss": 0.2422, + "step": 20480 + }, + { + "epoch": 2.01, + "eval_loss": 0.4246675968170166, + "eval_runtime": 32.2862, + "eval_samples_per_second": 106.454, + "eval_steps_per_second": 3.345, + "step": 20480 + }, + { + "epoch": 2.12, + "learning_rate": 2.8853152355660473e-05, + "loss": 0.2397, + "step": 21504 + }, + { + "epoch": 2.12, + "eval_loss": 0.42707231640815735, + "eval_runtime": 32.3373, + "eval_samples_per_second": 106.286, + "eval_steps_per_second": 3.34, + "step": 21504 + }, + { + "epoch": 2.22, + "learning_rate": 2.7846955837513527e-05, + "loss": 0.2397, + "step": 22528 + }, + { + "epoch": 2.22, + "eval_loss": 0.42262786626815796, + "eval_runtime": 32.3328, + "eval_samples_per_second": 106.301, + "eval_steps_per_second": 3.34, + "step": 22528 + }, + { + "epoch": 2.32, + "learning_rate": 2.6839775745057538e-05, + "loss": 0.2411, + "step": 23552 + }, + { + "epoch": 2.32, + "eval_loss": 0.42685696482658386, + "eval_runtime": 32.3962, + "eval_samples_per_second": 106.093, + "eval_steps_per_second": 3.334, + "step": 23552 + }, + { + "epoch": 2.42, + "learning_rate": 2.5832595652601556e-05, + "loss": 0.2408, + "step": 24576 + }, + { + "epoch": 2.42, + "eval_loss": 0.42877742648124695, + "eval_runtime": 32.3163, + "eval_samples_per_second": 106.355, + "eval_steps_per_second": 3.342, + "step": 24576 + }, + { + "epoch": 2.52, + "learning_rate": 2.482541556014557e-05, + "loss": 0.2392, + "step": 25600 + }, + { + "epoch": 2.52, + "eval_loss": 0.42227810621261597, + "eval_runtime": 32.369, + "eval_samples_per_second": 106.182, + "eval_steps_per_second": 3.337, + "step": 25600 + }, + { + "epoch": 2.62, + "learning_rate": 2.3819219041998624e-05, + "loss": 0.2391, + "step": 26624 + }, + { + "epoch": 2.62, + "eval_loss": 0.4296777546405792, + "eval_runtime": 32.4315, + "eval_samples_per_second": 105.977, + "eval_steps_per_second": 3.33, + "step": 26624 + }, + { + "epoch": 2.72, + "learning_rate": 2.2812038949542638e-05, + "loss": 0.2385, + "step": 27648 + }, + { + "epoch": 2.72, + "eval_loss": 0.4252742528915405, + "eval_runtime": 32.4362, + "eval_samples_per_second": 105.962, + "eval_steps_per_second": 3.33, + "step": 27648 + }, + { + "epoch": 2.82, + "learning_rate": 2.180584243139569e-05, + "loss": 0.2371, + "step": 28672 + }, + { + "epoch": 2.82, + "eval_loss": 0.42966845631599426, + "eval_runtime": 32.3834, + "eval_samples_per_second": 106.135, + "eval_steps_per_second": 3.335, + "step": 28672 + }, + { + "epoch": 2.92, + "learning_rate": 2.079866233893971e-05, + "loss": 0.2373, + "step": 29696 + }, + { + "epoch": 2.92, + "eval_loss": 0.4231690466403961, + "eval_runtime": 32.3708, + "eval_samples_per_second": 106.176, + "eval_steps_per_second": 3.336, + "step": 29696 + }, + { + "epoch": 3.02, + "learning_rate": 1.97934493951018e-05, + "loss": 0.2368, + "step": 30720 + }, + { + "epoch": 3.02, + "eval_loss": 0.42956846952438354, + "eval_runtime": 32.3442, + "eval_samples_per_second": 106.263, + "eval_steps_per_second": 3.339, + "step": 30720 + }, + { + "epoch": 3.12, + "learning_rate": 1.8786269302645816e-05, + "loss": 0.2355, + "step": 31744 + }, + { + "epoch": 3.12, + "eval_loss": 0.43274641036987305, + "eval_runtime": 32.3365, + "eval_samples_per_second": 106.289, + "eval_steps_per_second": 3.34, + "step": 31744 + }, + { + "epoch": 3.22, + "learning_rate": 1.777908921018983e-05, + "loss": 0.2354, + "step": 32768 + }, + { + "epoch": 3.22, + "eval_loss": 0.4304845929145813, + "eval_runtime": 32.2799, + "eval_samples_per_second": 106.475, + "eval_steps_per_second": 3.346, + "step": 32768 + }, + { + "epoch": 3.32, + "learning_rate": 1.6771909117733845e-05, + "loss": 0.2345, + "step": 33792 + }, + { + "epoch": 3.32, + "eval_loss": 0.4286292791366577, + "eval_runtime": 32.4389, + "eval_samples_per_second": 105.953, + "eval_steps_per_second": 3.329, + "step": 33792 + }, + { + "epoch": 3.42, + "learning_rate": 1.5765712599586898e-05, + "loss": 0.2355, + "step": 34816 + }, + { + "epoch": 3.42, + "eval_loss": 0.4350430965423584, + "eval_runtime": 32.3371, + "eval_samples_per_second": 106.287, + "eval_steps_per_second": 3.34, + "step": 34816 + }, + { + "epoch": 3.53, + "learning_rate": 1.4758532507130915e-05, + "loss": 0.2353, + "step": 35840 + }, + { + "epoch": 3.53, + "eval_loss": 0.4268806278705597, + "eval_runtime": 32.3956, + "eval_samples_per_second": 106.095, + "eval_steps_per_second": 3.334, + "step": 35840 + }, + { + "epoch": 3.63, + "learning_rate": 1.375233598898397e-05, + "loss": 0.2351, + "step": 36864 + }, + { + "epoch": 3.63, + "eval_loss": 0.43005427718162537, + "eval_runtime": 32.3262, + "eval_samples_per_second": 106.323, + "eval_steps_per_second": 3.341, + "step": 36864 + }, + { + "epoch": 3.73, + "learning_rate": 1.2745155896527982e-05, + "loss": 0.2336, + "step": 37888 + }, + { + "epoch": 3.73, + "eval_loss": 0.4301435649394989, + "eval_runtime": 32.4031, + "eval_samples_per_second": 106.07, + "eval_steps_per_second": 3.333, + "step": 37888 + }, + { + "epoch": 3.83, + "learning_rate": 1.1737975804071997e-05, + "loss": 0.2344, + "step": 38912 + }, + { + "epoch": 3.83, + "eval_loss": 0.43188127875328064, + "eval_runtime": 32.3893, + "eval_samples_per_second": 106.115, + "eval_steps_per_second": 3.334, + "step": 38912 + }, + { + "epoch": 3.93, + "learning_rate": 1.0730795711616013e-05, + "loss": 0.2339, + "step": 39936 + }, + { + "epoch": 3.93, + "eval_loss": 0.4304964244365692, + "eval_runtime": 32.4411, + "eval_samples_per_second": 105.946, + "eval_steps_per_second": 3.329, + "step": 39936 + }, + { + "epoch": 4.03, + "learning_rate": 9.724599193469066e-06, + "loss": 0.2326, + "step": 40960 + }, + { + "epoch": 4.03, + "eval_loss": 0.4298175573348999, + "eval_runtime": 32.3377, + "eval_samples_per_second": 106.285, + "eval_steps_per_second": 3.34, + "step": 40960 + }, + { + "epoch": 4.13, + "learning_rate": 8.718402675322121e-06, + "loss": 0.2316, + "step": 41984 + }, + { + "epoch": 4.13, + "eval_loss": 0.43077352643013, + "eval_runtime": 32.3835, + "eval_samples_per_second": 106.134, + "eval_steps_per_second": 3.335, + "step": 41984 + }, + { + "epoch": 4.23, + "learning_rate": 7.711222582866136e-06, + "loss": 0.2311, + "step": 43008 + }, + { + "epoch": 4.23, + "eval_loss": 0.43301910161972046, + "eval_runtime": 32.3178, + "eval_samples_per_second": 106.35, + "eval_steps_per_second": 3.342, + "step": 43008 + }, + { + "epoch": 4.33, + "learning_rate": 6.704042490410151e-06, + "loss": 0.2315, + "step": 44032 + }, + { + "epoch": 4.33, + "eval_loss": 0.4313049912452698, + "eval_runtime": 32.3292, + "eval_samples_per_second": 106.313, + "eval_steps_per_second": 3.341, + "step": 44032 + }, + { + "epoch": 4.43, + "learning_rate": 5.697845972263205e-06, + "loss": 0.2305, + "step": 45056 + }, + { + "epoch": 4.43, + "eval_loss": 0.43192604184150696, + "eval_runtime": 32.2814, + "eval_samples_per_second": 106.47, + "eval_steps_per_second": 3.346, + "step": 45056 + }, + { + "epoch": 4.53, + "learning_rate": 4.69066587980722e-06, + "loss": 0.2328, + "step": 46080 + }, + { + "epoch": 4.53, + "eval_loss": 0.42917123436927795, + "eval_runtime": 32.2788, + "eval_samples_per_second": 106.479, + "eval_steps_per_second": 3.346, + "step": 46080 + }, + { + "epoch": 4.63, + "learning_rate": 3.6834857873512347e-06, + "loss": 0.232, + "step": 47104 + }, + { + "epoch": 4.63, + "eval_loss": 0.4288509488105774, + "eval_runtime": 32.3263, + "eval_samples_per_second": 106.322, + "eval_steps_per_second": 3.341, + "step": 47104 + }, + { + "epoch": 4.73, + "learning_rate": 2.6763056948952493e-06, + "loss": 0.2309, + "step": 48128 + }, + { + "epoch": 4.73, + "eval_loss": 0.43027371168136597, + "eval_runtime": 32.3873, + "eval_samples_per_second": 106.122, + "eval_steps_per_second": 3.335, + "step": 48128 + }, + { + "epoch": 4.83, + "learning_rate": 1.6701091767483034e-06, + "loss": 0.23, + "step": 49152 + }, + { + "epoch": 4.83, + "eval_loss": 0.4316680133342743, + "eval_runtime": 32.3291, + "eval_samples_per_second": 106.313, + "eval_steps_per_second": 3.341, + "step": 49152 + } + ], + "max_steps": 50835, + "num_train_epochs": 5, + "total_flos": 4.1126902176487834e+17, + "trial_name": null, + "trial_params": null +} diff --git a/manual_upload/checkpoint-49152/training_args.bin b/manual_upload/checkpoint-49152/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe0b86ed4a6ab1c86797fadcaa43d46ee74f7857 --- /dev/null +++ b/manual_upload/checkpoint-49152/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf489f059ab44f9bc26200d314d3fc5954dabfb501b51ecc19cdc4d4be8a527 +size 3579 diff --git a/manual_upload/config.json b/manual_upload/config.json index 389b6ccaa37533f8b02bbdb9b75931ac8d25ddee..e5041639331370e15270cc287c7f7a8566c79f1a 100644 --- a/manual_upload/config.json +++ b/manual_upload/config.json @@ -1,4 +1,5 @@ { + "_name_or_path": "JammyMachina/elec-gmusic-familized-model-13-12__17-35-53", "activation_function": "gelu_new", "architectures": [ "GPT2LMHeadModel" diff --git a/manual_upload/manual_upload/.gitattributes b/manual_upload/manual_upload/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..c7d9f3332a950355d5a77d85000f05e6f45435ea --- /dev/null +++ b/manual_upload/manual_upload/.gitattributes @@ -0,0 +1,34 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/manual_upload/manual_upload/.gitignore b/manual_upload/manual_upload/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..0348ea97130c017c407fcfb6fd4003859f17b84c --- /dev/null +++ b/manual_upload/manual_upload/.gitignore @@ -0,0 +1 @@ +checkpoint-*/ \ No newline at end of file diff --git a/manual_upload/manual_upload/config.json b/manual_upload/manual_upload/config.json new file mode 100644 index 0000000000000000000000000000000000000000..389b6ccaa37533f8b02bbdb9b75931ac8d25ddee --- /dev/null +++ b/manual_upload/manual_upload/config.json @@ -0,0 +1,32 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_embd": 512, + "n_head": 8, + "n_inner": null, + "n_layer": 6, + "n_positions": 2048, + "pad_token_id": 1, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "torch_dtype": "float32", + "transformers_version": "4.26.0.dev0", + "use_cache": true, + "vocab_size": 301 +} diff --git a/manual_upload/manual_upload/pytorch_model.bin b/manual_upload/manual_upload/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2a3a443f1311801a0f6f1ddad21b41a06d2fdad --- /dev/null +++ b/manual_upload/manual_upload/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54af5d8875e2c2fd3cc37c56d33cad185fa27c7098ef23bdcb9ec77ecf847f0e +size 105666297 diff --git a/manual_upload/manual_upload/special_tokens_map.json b/manual_upload/manual_upload/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..bd76b2b2c813ed0fbec7b73fe1e0d837191c707a --- /dev/null +++ b/manual_upload/manual_upload/special_tokens_map.json @@ -0,0 +1,3 @@ +{ + "pad_token": "[PAD]" +} diff --git a/manual_upload/manual_upload/tokenizer.json b/manual_upload/manual_upload/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..ff75163d045b6fe18990b1c8e9fd328087e27233 --- /dev/null +++ b/manual_upload/manual_upload/tokenizer.json @@ -0,0 +1,347 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "WhitespaceSplit" + }, + "post_processor": null, + "decoder": null, + "model": { + "type": "WordLevel", + "vocab": { + "[UNK]": 0, + "[PAD]": 1, + "[MASK]": 2, + "TIME_DELTA=1": 3, + "TIME_DELTA=2": 4, + "BAR_END": 5, + "BAR_START": 6, + "NOTE_OFF=42": 7, + "NOTE_ON=42": 8, + "NOTE_OFF=36": 9, + "NOTE_ON=36": 10, + "TIME_DELTA=4": 11, + "NOTE_OFF=38": 12, + "NOTE_ON=38": 13, + "NOTE_OFF=40": 14, + "NOTE_ON=40": 15, + "NOTE_OFF=35": 16, + "NOTE_ON=35": 17, + "TIME_DELTA=3": 18, + "NOTE_OFF=64": 19, + "NOTE_ON=64": 20, + "NOTE_OFF=57": 21, + "NOTE_ON=57": 22, + "NOTE_OFF=62": 23, + "NOTE_ON=62": 24, + "NOTE_OFF=54": 25, + "NOTE_ON=54": 26, + "NOTE_OFF=59": 27, + "NOTE_ON=59": 28, + "NOTE_OFF=55": 29, + "NOTE_ON=55": 30, + "NOTE_OFF=69": 31, + "NOTE_ON=69": 32, + "NOTE_OFF=60": 33, + "NOTE_ON=60": 34, + "NOTE_OFF=46": 35, + "NOTE_ON=46": 36, + "NOTE_OFF=44": 37, + "NOTE_ON=44": 38, + "NOTE_OFF=50": 39, + "NOTE_ON=50": 40, + "NOTE_OFF=67": 41, + "NOTE_ON=67": 42, + "NOTE_OFF=45": 43, + "NOTE_ON=45": 44, + "NOTE_OFF=52": 45, + "NOTE_ON=52": 46, + "NOTE_OFF=70": 47, + "NOTE_ON=70": 48, + "NOTE_OFF=51": 49, + "NOTE_ON=51": 50, + "TRACK_END": 51, + "TRACK_START": 52, + "NOTE_OFF=61": 53, + "NOTE_ON=61": 54, + "NOTE_OFF=66": 55, + "NOTE_ON=66": 56, + "NOTE_OFF=48": 57, + "NOTE_ON=48": 58, + "NOTE_OFF=65": 59, + "NOTE_ON=65": 60, + "NOTE_OFF=43": 61, + "NOTE_ON=43": 62, + "NOTE_OFF=63": 63, + "NOTE_ON=63": 64, + "NOTE_OFF=53": 65, + "NOTE_ON=53": 66, + "NOTE_OFF=47": 67, + "NOTE_ON=47": 68, + "NOTE_OFF=49": 69, + "NOTE_ON=49": 70, + "NOTE_OFF=58": 71, + "NOTE_ON=58": 72, + "NOTE_OFF=56": 73, + "NOTE_ON=56": 74, + "NOTE_OFF=39": 75, + "NOTE_ON=39": 76, + "NOTE_OFF=41": 77, + "NOTE_ON=41": 78, + "NOTE_OFF=71": 79, + "NOTE_ON=71": 80, + "TIME_DELTA=6": 81, + "NOTE_OFF=72": 82, + "NOTE_ON=72": 83, + "NOTE_OFF=33": 84, + "NOTE_ON=33": 85, + "NOTE_OFF=68": 86, + "NOTE_ON=68": 87, + "NOTE_OFF=74": 88, + "NOTE_ON=74": 89, + "TIME_DELTA=8": 90, + "TIME_DELTA=16": 91, + "NOTE_OFF=76": 92, + "NOTE_ON=76": 93, + "NOTE_OFF=82": 94, + "NOTE_ON=82": 95, + "NOTE_OFF=37": 96, + "NOTE_ON=37": 97, + "NOTE_OFF=31": 98, + "NOTE_ON=31": 99, + "NOTE_OFF=73": 100, + "NOTE_ON=73": 101, + "NOTE_OFF=28": 102, + "NOTE_ON=28": 103, + "NOTE_OFF=34": 104, + "NOTE_ON=34": 105, + "NOTE_OFF=75": 106, + "NOTE_ON=75": 107, + "TIME_DELTA=5": 108, + "NOTE_OFF=29": 109, + "NOTE_ON=29": 110, + "NOTE_OFF=32": 111, + "NOTE_ON=32": 112, + "NOTE_OFF=79": 113, + "NOTE_ON=79": 114, + "DENSITY=3": 115, + "NOTE_OFF=81": 116, + "NOTE_ON=81": 117, + "NOTE_OFF=77": 118, + "NOTE_ON=77": 119, + "NOTE_OFF=78": 120, + "NOTE_ON=78": 121, + "INST=3": 122, + "NOTE_OFF=30": 123, + "NOTE_ON=30": 124, + "DENSITY=2": 125, + "DENSITY=1": 126, + "TIME_DELTA=7": 127, + "DENSITY=0": 128, + "NOTE_OFF=26": 129, + "NOTE_ON=26": 130, + "INST=DRUMS": 131, + "NOTE_OFF=80": 132, + "NOTE_ON=80": 133, + "TIME_DELTA=12": 134, + "NOTE_OFF=27": 135, + "NOTE_ON=27": 136, + "PIECE_START": 137, + "TIME_DELTA=10": 138, + "NOTE_OFF=83": 139, + "NOTE_ON=83": 140, + "INST=4": 141, + "NOTE_OFF=84": 142, + "NOTE_ON=84": 143, + "NOTE_OFF=86": 144, + "NOTE_ON=86": 145, + "TIME_DELTA=14": 146, + "INST=0": 147, + "TIME_DELTA=15": 148, + "INST=6": 149, + "NOTE_OFF=85": 150, + "NOTE_ON=85": 151, + "NOTE_OFF=88": 152, + "NOTE_ON=88": 153, + "TIME_DELTA=9": 154, + "NOTE_OFF=24": 155, + "NOTE_ON=24": 156, + "NOTE_OFF=87": 157, + "NOTE_ON=87": 158, + "TIME_DELTA=11": 159, + "NOTE_OFF=91": 160, + "NOTE_ON=91": 161, + "INST=10": 162, + "TIME_DELTA=13": 163, + "NOTE_OFF=25": 164, + "NOTE_ON=25": 165, + "NOTE_OFF=89": 166, + "NOTE_ON=89": 167, + "INST=8": 168, + "NOTE_OFF=93": 169, + "NOTE_ON=93": 170, + "INST=7": 171, + "INST=11": 172, + "NOTE_OFF=90": 173, + "NOTE_ON=90": 174, + "NOTE_OFF=22": 175, + "NOTE_ON=22": 176, + "INST=2": 177, + "NOTE_OFF=23": 178, + "NOTE_ON=23": 179, + "NOTE_OFF=0": 180, + "NOTE_ON=0": 181, + "NOTE_OFF=94": 182, + "NOTE_ON=94": 183, + "NOTE_OFF=95": 184, + "NOTE_ON=95": 185, + "NOTE_OFF=96": 186, + "NOTE_ON=96": 187, + "INST=9": 188, + "INST=5": 189, + "INST=12": 190, + "NOTE_OFF=92": 191, + "NOTE_ON=92": 192, + "INST=1": 193, + "NOTE_OFF=98": 194, + "NOTE_ON=98": 195, + "INST=14": 196, + "NOTE_OFF=20": 197, + "NOTE_ON=20": 198, + "NOTE_OFF=100": 199, + "NOTE_ON=100": 200, + "INST=13": 201, + "INST=15": 202, + "NOTE_OFF=21": 203, + "NOTE_ON=21": 204, + "NOTE_OFF=99": 205, + "NOTE_ON=99": 206, + "NOTE_OFF=101": 207, + "NOTE_ON=101": 208, + "NOTE_OFF=16": 209, + "NOTE_ON=16": 210, + "NOTE_OFF=97": 211, + "NOTE_ON=97": 212, + "NOTE_OFF=102": 213, + "NOTE_ON=102": 214, + "NOTE_OFF=17": 215, + "NOTE_ON=17": 216, + "NOTE_OFF=19": 217, + "NOTE_ON=19": 218, + "NOTE_OFF=103": 219, + "NOTE_ON=103": 220, + "NOTE_OFF=107": 221, + "NOTE_ON=107": 222, + "NOTE_OFF=105": 223, + "NOTE_ON=105": 224, + "NOTE_OFF=18": 225, + "NOTE_ON=18": 226, + "NOTE_OFF=109": 227, + "NOTE_ON=109": 228, + "NOTE_OFF=110": 229, + "NOTE_ON=110": 230, + "NOTE_OFF=112": 231, + "NOTE_ON=112": 232, + "NOTE_OFF=15": 233, + "NOTE_ON=15": 234, + "NOTE_OFF=6": 235, + "NOTE_ON=6": 236, + "NOTE_OFF=108": 237, + "NOTE_ON=108": 238, + "NOTE_OFF=12": 239, + "NOTE_ON=12": 240, + "NOTE_OFF=126": 241, + "NOTE_ON=126": 242, + "NOTE_OFF=14": 243, + "NOTE_ON=14": 244, + "NOTE_OFF=104": 245, + "NOTE_ON=104": 246, + "NOTE_OFF=7": 247, + "NOTE_ON=7": 248, + "NOTE_OFF=8": 249, + "NOTE_ON=8": 250, + "NOTE_OFF=123": 251, + "NOTE_ON=123": 252, + "NOTE_OFF=106": 253, + "NOTE_ON=106": 254, + "NOTE_OFF=1": 255, + "NOTE_ON=1": 256, + "NOTE_OFF=122": 257, + "NOTE_ON=122": 258, + "NOTE_OFF=124": 259, + "NOTE_ON=124": 260, + "NOTE_OFF=127": 261, + "NOTE_ON=127": 262, + "NOTE_OFF=9": 263, + "NOTE_ON=9": 264, + "NOTE_OFF=125": 265, + "NOTE_ON=125": 266, + "NOTE_OFF=114": 267, + "NOTE_ON=114": 268, + "NOTE_OFF=117": 269, + "NOTE_ON=117": 270, + "NOTE_OFF=113": 271, + "NOTE_ON=113": 272, + "NOTE_OFF=119": 273, + "NOTE_ON=119": 274, + "NOTE_OFF=120": 275, + "NOTE_OFF=2": 276, + "NOTE_ON=120": 277, + "NOTE_ON=2": 278, + "NOTE_OFF=115": 279, + "NOTE_ON=115": 280, + "NOTE_OFF=111": 281, + "NOTE_ON=111": 282, + "NOTE_OFF=4": 283, + "NOTE_ON=4": 284, + "NOTE_OFF=116": 285, + "NOTE_ON=116": 286, + "NOTE_OFF=5": 287, + "NOTE_ON=5": 288, + "NOTE_OFF=3": 289, + "NOTE_ON=3": 290, + "NOTE_OFF=11": 291, + "NOTE_ON=11": 292, + "NOTE_OFF=10": 293, + "NOTE_OFF=118": 294, + "NOTE_OFF=121": 295, + "NOTE_ON=10": 296, + "NOTE_ON=118": 297, + "NOTE_ON=121": 298, + "NOTE_OFF=13": 299, + "NOTE_ON=13": 300 + }, + "unk_token": "[UNK]" + } +} \ No newline at end of file diff --git a/manual_upload/manual_upload/tokenizer_config.json b/manual_upload/manual_upload/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b97dea73b49d2458f7cad04e387ed90780d000e3 --- /dev/null +++ b/manual_upload/manual_upload/tokenizer_config.json @@ -0,0 +1,6 @@ +{ + "model_max_length": 1000000000000000019884624838656, + "name_or_path": "JammyMachina/elec-gmusic-familized-model-13-12__17-35-53", + "special_tokens_map_file": "/root/.cache/huggingface/hub/models--JammyMachina--elec-gmusic-familized-model-13-12__17-35-53/snapshots/fbba9d2ac598a2e0fbec338593aceff49347aff4/special_tokens_map.json", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/manual_upload/manual_upload/trainer_state.json b/manual_upload/manual_upload/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..829a6020707fb734f7ff712053844682623b1750 --- /dev/null +++ b/manual_upload/manual_upload/trainer_state.json @@ -0,0 +1,15 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": null, + "global_step": 0, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [], + "max_steps": 0, + "num_train_epochs": 0, + "total_flos": 0, + "trial_name": null, + "trial_params": null +} diff --git a/manual_upload/manual_upload/training_args.bin b/manual_upload/manual_upload/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3a1307a6fc7a9262833f981703264dc0bd753507 --- /dev/null +++ b/manual_upload/manual_upload/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0206a7605fed29c8321de91dfcf793fb6150f09dc4e519e1121bb6e0b17b29fc +size 3515 diff --git a/manual_upload/manual_upload/training_args.json b/manual_upload/manual_upload/training_args.json new file mode 100644 index 0000000000000000000000000000000000000000..f16990343e6c8bf86f1ed5fd8f642297ee873ac0 --- /dev/null +++ b/manual_upload/manual_upload/training_args.json @@ -0,0 +1,109 @@ +{ + "output_dir": "models/elec-gmusic-familized", + "overwrite_output_dir": true, + "do_train": false, + "do_eval": true, + "do_predict": false, + "evaluation_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 10, + "per_device_eval_batch_size": 8, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "learning_rate": 0.0005, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 6, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "warmup_ratio": 0.0, + "warmup_steps": 200, + "log_level": "passive", + "log_level_replica": "passive", + "log_on_each_node": true, + "logging_dir": "models/elec-gmusic-familized/logs", + "logging_strategy": "steps", + "logging_first_step": false, + "logging_steps": 4096, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 16384, + "save_total_limit": 5, + "save_on_each_node": false, + "no_cuda": false, + "use_mps_device": false, + "seed": 42, + "data_seed": null, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": false, + "fp16": true, + "fp16_opt_level": "O1", + "half_precision_backend": "cuda_amp", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": -1, + "xpu_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": [], + "dataloader_drop_last": false, + "eval_steps": 4096, + "dataloader_num_workers": 0, + "past_index": -1, + "run_name": "models/elec-gmusic-familized", + "disable_tqdm": false, + "remove_unused_columns": true, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": null, + "greater_is_better": null, + "ignore_data_skip": false, + "sharded_ddp": [], + "fsdp": [], + "fsdp_min_num_params": 0, + "fsdp_transformer_layer_cls_to_wrap": null, + "deepspeed": null, + "label_smoothing_factor": 0.0, + "optim": "adamw_hf", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "wandb" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "dataloader_pin_memory": true, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": true, + "resume_from_checkpoint": null, + "hub_model_id": "JammyMachina/elec-gmusic-familized-model-13-12__17-35-53", + "hub_strategy": "every_save", + "hub_token": "", + "hub_private_repo": false, + "gradient_checkpointing": false, + "include_inputs_for_metrics": false, + "fp16_backend": "auto", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": "", + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null +} \ No newline at end of file diff --git a/manual_upload/pytorch_model.bin b/manual_upload/pytorch_model.bin index c2a3a443f1311801a0f6f1ddad21b41a06d2fdad..43e6bd94ca909f5f482733a4b2d1a2049380e6c3 100644 --- a/manual_upload/pytorch_model.bin +++ b/manual_upload/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54af5d8875e2c2fd3cc37c56d33cad185fa27c7098ef23bdcb9ec77ecf847f0e +oid sha256:35fb753e7e3cdca765a1357f59bfef6a95b36d2feb92a5a07d8388f0cf0d33c4 size 105666297 diff --git a/manual_upload/tokenizer.json b/manual_upload/tokenizer.json index ff75163d045b6fe18990b1c8e9fd328087e27233..318b1885ed41e31bfa5c3fd6ba2148c5514875e8 100644 --- a/manual_upload/tokenizer.json +++ b/manual_upload/tokenizer.json @@ -1,7 +1,19 @@ { "version": "1.0", - "truncation": null, - "padding": null, + "truncation": { + "direction": "Right", + "max_length": 2048, + "strategy": "LongestFirst", + "stride": 0 + }, + "padding": { + "strategy": "BatchLongest", + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 1, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, "added_tokens": [ { "id": 0, diff --git a/manual_upload/trainer_state.json b/manual_upload/trainer_state.json index 829a6020707fb734f7ff712053844682623b1750..ca02d20fa4cddaaa4895f560bb70c96fd638f161 100644 --- a/manual_upload/trainer_state.json +++ b/manual_upload/trainer_state.json @@ -1,15 +1,711 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": null, - "global_step": 0, + "epoch": 5.0, + "global_step": 50835, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, - "log_history": [], - "max_steps": 0, - "num_train_epochs": 0, - "total_flos": 0, + "log_history": [ + { + "epoch": 0.1, + "learning_rate": 4.899281990754402e-05, + "loss": 0.2509, + "step": 1024 + }, + { + "epoch": 0.1, + "eval_loss": 0.42676058411598206, + "eval_runtime": 32.3598, + "eval_samples_per_second": 106.212, + "eval_steps_per_second": 3.337, + "step": 1024 + }, + { + "epoch": 0.2, + "learning_rate": 4.798563981508803e-05, + "loss": 0.2521, + "step": 2048 + }, + { + "epoch": 0.2, + "eval_loss": 0.4283539652824402, + "eval_runtime": 32.4193, + "eval_samples_per_second": 106.017, + "eval_steps_per_second": 3.331, + "step": 2048 + }, + { + "epoch": 0.3, + "learning_rate": 4.697845972263204e-05, + "loss": 0.2533, + "step": 3072 + }, + { + "epoch": 0.3, + "eval_loss": 0.42187923192977905, + "eval_runtime": 32.3798, + "eval_samples_per_second": 106.146, + "eval_steps_per_second": 3.335, + "step": 3072 + }, + { + "epoch": 0.4, + "learning_rate": 4.597127963017606e-05, + "loss": 0.2517, + "step": 4096 + }, + { + "epoch": 0.4, + "eval_loss": 0.42451202869415283, + "eval_runtime": 32.3385, + "eval_samples_per_second": 106.282, + "eval_steps_per_second": 3.34, + "step": 4096 + }, + { + "epoch": 0.5, + "learning_rate": 4.496409953772008e-05, + "loss": 0.2512, + "step": 5120 + }, + { + "epoch": 0.5, + "eval_loss": 0.42285656929016113, + "eval_runtime": 32.3235, + "eval_samples_per_second": 106.331, + "eval_steps_per_second": 3.341, + "step": 5120 + }, + { + "epoch": 0.6, + "learning_rate": 4.3956919445264097e-05, + "loss": 0.2506, + "step": 6144 + }, + { + "epoch": 0.6, + "eval_loss": 0.4190705716609955, + "eval_runtime": 32.4437, + "eval_samples_per_second": 105.937, + "eval_steps_per_second": 3.329, + "step": 6144 + }, + { + "epoch": 0.71, + "learning_rate": 4.294973935280811e-05, + "loss": 0.2512, + "step": 7168 + }, + { + "epoch": 0.71, + "eval_loss": 0.4247213900089264, + "eval_runtime": 32.3289, + "eval_samples_per_second": 106.314, + "eval_steps_per_second": 3.341, + "step": 7168 + }, + { + "epoch": 0.81, + "learning_rate": 4.194255926035212e-05, + "loss": 0.2483, + "step": 8192 + }, + { + "epoch": 0.81, + "eval_loss": 0.4238651394844055, + "eval_runtime": 32.3862, + "eval_samples_per_second": 106.126, + "eval_steps_per_second": 3.335, + "step": 8192 + }, + { + "epoch": 0.91, + "learning_rate": 4.093537916789614e-05, + "loss": 0.2479, + "step": 9216 + }, + { + "epoch": 0.91, + "eval_loss": 0.4259129762649536, + "eval_runtime": 32.4627, + "eval_samples_per_second": 105.875, + "eval_steps_per_second": 3.327, + "step": 9216 + }, + { + "epoch": 1.01, + "learning_rate": 3.9928199075440155e-05, + "loss": 0.2498, + "step": 10240 + }, + { + "epoch": 1.01, + "eval_loss": 0.4262418746948242, + "eval_runtime": 32.3999, + "eval_samples_per_second": 106.081, + "eval_steps_per_second": 3.333, + "step": 10240 + }, + { + "epoch": 1.11, + "learning_rate": 3.8921018982984166e-05, + "loss": 0.2467, + "step": 11264 + }, + { + "epoch": 1.11, + "eval_loss": 0.4267333149909973, + "eval_runtime": 32.3308, + "eval_samples_per_second": 106.307, + "eval_steps_per_second": 3.34, + "step": 11264 + }, + { + "epoch": 1.21, + "learning_rate": 3.791482246483722e-05, + "loss": 0.2466, + "step": 12288 + }, + { + "epoch": 1.21, + "eval_loss": 0.4263165295124054, + "eval_runtime": 32.4348, + "eval_samples_per_second": 105.967, + "eval_steps_per_second": 3.33, + "step": 12288 + }, + { + "epoch": 1.31, + "learning_rate": 3.690764237238124e-05, + "loss": 0.2449, + "step": 13312 + }, + { + "epoch": 1.31, + "eval_loss": 0.42505738139152527, + "eval_runtime": 32.3762, + "eval_samples_per_second": 106.158, + "eval_steps_per_second": 3.336, + "step": 13312 + }, + { + "epoch": 1.41, + "learning_rate": 3.590144585423429e-05, + "loss": 0.2452, + "step": 14336 + }, + { + "epoch": 1.41, + "eval_loss": 0.42740598320961, + "eval_runtime": 32.44, + "eval_samples_per_second": 105.949, + "eval_steps_per_second": 3.329, + "step": 14336 + }, + { + "epoch": 1.51, + "learning_rate": 3.489426576177831e-05, + "loss": 0.2449, + "step": 15360 + }, + { + "epoch": 1.51, + "eval_loss": 0.42628249526023865, + "eval_runtime": 32.3172, + "eval_samples_per_second": 106.352, + "eval_steps_per_second": 3.342, + "step": 15360 + }, + { + "epoch": 1.61, + "learning_rate": 3.388708566932232e-05, + "loss": 0.2444, + "step": 16384 + }, + { + "epoch": 1.61, + "eval_loss": 0.42398524284362793, + "eval_runtime": 32.2909, + "eval_samples_per_second": 106.439, + "eval_steps_per_second": 3.345, + "step": 16384 + }, + { + "epoch": 1.71, + "learning_rate": 3.287990557686633e-05, + "loss": 0.2428, + "step": 17408 + }, + { + "epoch": 1.71, + "eval_loss": 0.42891454696655273, + "eval_runtime": 32.3773, + "eval_samples_per_second": 106.155, + "eval_steps_per_second": 3.336, + "step": 17408 + }, + { + "epoch": 1.81, + "learning_rate": 3.1873709058719384e-05, + "loss": 0.2425, + "step": 18432 + }, + { + "epoch": 1.81, + "eval_loss": 0.4228712022304535, + "eval_runtime": 32.4341, + "eval_samples_per_second": 105.969, + "eval_steps_per_second": 3.33, + "step": 18432 + }, + { + "epoch": 1.91, + "learning_rate": 3.08665289662634e-05, + "loss": 0.2424, + "step": 19456 + }, + { + "epoch": 1.91, + "eval_loss": 0.4291061758995056, + "eval_runtime": 32.3192, + "eval_samples_per_second": 106.345, + "eval_steps_per_second": 3.342, + "step": 19456 + }, + { + "epoch": 2.01, + "learning_rate": 2.985934887380742e-05, + "loss": 0.2422, + "step": 20480 + }, + { + "epoch": 2.01, + "eval_loss": 0.4246675968170166, + "eval_runtime": 32.2862, + "eval_samples_per_second": 106.454, + "eval_steps_per_second": 3.345, + "step": 20480 + }, + { + "epoch": 2.12, + "learning_rate": 2.8853152355660473e-05, + "loss": 0.2397, + "step": 21504 + }, + { + "epoch": 2.12, + "eval_loss": 0.42707231640815735, + "eval_runtime": 32.3373, + "eval_samples_per_second": 106.286, + "eval_steps_per_second": 3.34, + "step": 21504 + }, + { + "epoch": 2.22, + "learning_rate": 2.7846955837513527e-05, + "loss": 0.2397, + "step": 22528 + }, + { + "epoch": 2.22, + "eval_loss": 0.42262786626815796, + "eval_runtime": 32.3328, + "eval_samples_per_second": 106.301, + "eval_steps_per_second": 3.34, + "step": 22528 + }, + { + "epoch": 2.32, + "learning_rate": 2.6839775745057538e-05, + "loss": 0.2411, + "step": 23552 + }, + { + "epoch": 2.32, + "eval_loss": 0.42685696482658386, + "eval_runtime": 32.3962, + "eval_samples_per_second": 106.093, + "eval_steps_per_second": 3.334, + "step": 23552 + }, + { + "epoch": 2.42, + "learning_rate": 2.5832595652601556e-05, + "loss": 0.2408, + "step": 24576 + }, + { + "epoch": 2.42, + "eval_loss": 0.42877742648124695, + "eval_runtime": 32.3163, + "eval_samples_per_second": 106.355, + "eval_steps_per_second": 3.342, + "step": 24576 + }, + { + "epoch": 2.52, + "learning_rate": 2.482541556014557e-05, + "loss": 0.2392, + "step": 25600 + }, + { + "epoch": 2.52, + "eval_loss": 0.42227810621261597, + "eval_runtime": 32.369, + "eval_samples_per_second": 106.182, + "eval_steps_per_second": 3.337, + "step": 25600 + }, + { + "epoch": 2.62, + "learning_rate": 2.3819219041998624e-05, + "loss": 0.2391, + "step": 26624 + }, + { + "epoch": 2.62, + "eval_loss": 0.4296777546405792, + "eval_runtime": 32.4315, + "eval_samples_per_second": 105.977, + "eval_steps_per_second": 3.33, + "step": 26624 + }, + { + "epoch": 2.72, + "learning_rate": 2.2812038949542638e-05, + "loss": 0.2385, + "step": 27648 + }, + { + "epoch": 2.72, + "eval_loss": 0.4252742528915405, + "eval_runtime": 32.4362, + "eval_samples_per_second": 105.962, + "eval_steps_per_second": 3.33, + "step": 27648 + }, + { + "epoch": 2.82, + "learning_rate": 2.180584243139569e-05, + "loss": 0.2371, + "step": 28672 + }, + { + "epoch": 2.82, + "eval_loss": 0.42966845631599426, + "eval_runtime": 32.3834, + "eval_samples_per_second": 106.135, + "eval_steps_per_second": 3.335, + "step": 28672 + }, + { + "epoch": 2.92, + "learning_rate": 2.079866233893971e-05, + "loss": 0.2373, + "step": 29696 + }, + { + "epoch": 2.92, + "eval_loss": 0.4231690466403961, + "eval_runtime": 32.3708, + "eval_samples_per_second": 106.176, + "eval_steps_per_second": 3.336, + "step": 29696 + }, + { + "epoch": 3.02, + "learning_rate": 1.97934493951018e-05, + "loss": 0.2368, + "step": 30720 + }, + { + "epoch": 3.02, + "eval_loss": 0.42956846952438354, + "eval_runtime": 32.3442, + "eval_samples_per_second": 106.263, + "eval_steps_per_second": 3.339, + "step": 30720 + }, + { + "epoch": 3.12, + "learning_rate": 1.8786269302645816e-05, + "loss": 0.2355, + "step": 31744 + }, + { + "epoch": 3.12, + "eval_loss": 0.43274641036987305, + "eval_runtime": 32.3365, + "eval_samples_per_second": 106.289, + "eval_steps_per_second": 3.34, + "step": 31744 + }, + { + "epoch": 3.22, + "learning_rate": 1.777908921018983e-05, + "loss": 0.2354, + "step": 32768 + }, + { + "epoch": 3.22, + "eval_loss": 0.4304845929145813, + "eval_runtime": 32.2799, + "eval_samples_per_second": 106.475, + "eval_steps_per_second": 3.346, + "step": 32768 + }, + { + "epoch": 3.32, + "learning_rate": 1.6771909117733845e-05, + "loss": 0.2345, + "step": 33792 + }, + { + "epoch": 3.32, + "eval_loss": 0.4286292791366577, + "eval_runtime": 32.4389, + "eval_samples_per_second": 105.953, + "eval_steps_per_second": 3.329, + "step": 33792 + }, + { + "epoch": 3.42, + "learning_rate": 1.5765712599586898e-05, + "loss": 0.2355, + "step": 34816 + }, + { + "epoch": 3.42, + "eval_loss": 0.4350430965423584, + "eval_runtime": 32.3371, + "eval_samples_per_second": 106.287, + "eval_steps_per_second": 3.34, + "step": 34816 + }, + { + "epoch": 3.53, + "learning_rate": 1.4758532507130915e-05, + "loss": 0.2353, + "step": 35840 + }, + { + "epoch": 3.53, + "eval_loss": 0.4268806278705597, + "eval_runtime": 32.3956, + "eval_samples_per_second": 106.095, + "eval_steps_per_second": 3.334, + "step": 35840 + }, + { + "epoch": 3.63, + "learning_rate": 1.375233598898397e-05, + "loss": 0.2351, + "step": 36864 + }, + { + "epoch": 3.63, + "eval_loss": 0.43005427718162537, + "eval_runtime": 32.3262, + "eval_samples_per_second": 106.323, + "eval_steps_per_second": 3.341, + "step": 36864 + }, + { + "epoch": 3.73, + "learning_rate": 1.2745155896527982e-05, + "loss": 0.2336, + "step": 37888 + }, + { + "epoch": 3.73, + "eval_loss": 0.4301435649394989, + "eval_runtime": 32.4031, + "eval_samples_per_second": 106.07, + "eval_steps_per_second": 3.333, + "step": 37888 + }, + { + "epoch": 3.83, + "learning_rate": 1.1737975804071997e-05, + "loss": 0.2344, + "step": 38912 + }, + { + "epoch": 3.83, + "eval_loss": 0.43188127875328064, + "eval_runtime": 32.3893, + "eval_samples_per_second": 106.115, + "eval_steps_per_second": 3.334, + "step": 38912 + }, + { + "epoch": 3.93, + "learning_rate": 1.0730795711616013e-05, + "loss": 0.2339, + "step": 39936 + }, + { + "epoch": 3.93, + "eval_loss": 0.4304964244365692, + "eval_runtime": 32.4411, + "eval_samples_per_second": 105.946, + "eval_steps_per_second": 3.329, + "step": 39936 + }, + { + "epoch": 4.03, + "learning_rate": 9.724599193469066e-06, + "loss": 0.2326, + "step": 40960 + }, + { + "epoch": 4.03, + "eval_loss": 0.4298175573348999, + "eval_runtime": 32.3377, + "eval_samples_per_second": 106.285, + "eval_steps_per_second": 3.34, + "step": 40960 + }, + { + "epoch": 4.13, + "learning_rate": 8.718402675322121e-06, + "loss": 0.2316, + "step": 41984 + }, + { + "epoch": 4.13, + "eval_loss": 0.43077352643013, + "eval_runtime": 32.3835, + "eval_samples_per_second": 106.134, + "eval_steps_per_second": 3.335, + "step": 41984 + }, + { + "epoch": 4.23, + "learning_rate": 7.711222582866136e-06, + "loss": 0.2311, + "step": 43008 + }, + { + "epoch": 4.23, + "eval_loss": 0.43301910161972046, + "eval_runtime": 32.3178, + "eval_samples_per_second": 106.35, + "eval_steps_per_second": 3.342, + "step": 43008 + }, + { + "epoch": 4.33, + "learning_rate": 6.704042490410151e-06, + "loss": 0.2315, + "step": 44032 + }, + { + "epoch": 4.33, + "eval_loss": 0.4313049912452698, + "eval_runtime": 32.3292, + "eval_samples_per_second": 106.313, + "eval_steps_per_second": 3.341, + "step": 44032 + }, + { + "epoch": 4.43, + "learning_rate": 5.697845972263205e-06, + "loss": 0.2305, + "step": 45056 + }, + { + "epoch": 4.43, + "eval_loss": 0.43192604184150696, + "eval_runtime": 32.2814, + "eval_samples_per_second": 106.47, + "eval_steps_per_second": 3.346, + "step": 45056 + }, + { + "epoch": 4.53, + "learning_rate": 4.69066587980722e-06, + "loss": 0.2328, + "step": 46080 + }, + { + "epoch": 4.53, + "eval_loss": 0.42917123436927795, + "eval_runtime": 32.2788, + "eval_samples_per_second": 106.479, + "eval_steps_per_second": 3.346, + "step": 46080 + }, + { + "epoch": 4.63, + "learning_rate": 3.6834857873512347e-06, + "loss": 0.232, + "step": 47104 + }, + { + "epoch": 4.63, + "eval_loss": 0.4288509488105774, + "eval_runtime": 32.3263, + "eval_samples_per_second": 106.322, + "eval_steps_per_second": 3.341, + "step": 47104 + }, + { + "epoch": 4.73, + "learning_rate": 2.6763056948952493e-06, + "loss": 0.2309, + "step": 48128 + }, + { + "epoch": 4.73, + "eval_loss": 0.43027371168136597, + "eval_runtime": 32.3873, + "eval_samples_per_second": 106.122, + "eval_steps_per_second": 3.335, + "step": 48128 + }, + { + "epoch": 4.83, + "learning_rate": 1.6701091767483034e-06, + "loss": 0.23, + "step": 49152 + }, + { + "epoch": 4.83, + "eval_loss": 0.4316680133342743, + "eval_runtime": 32.3291, + "eval_samples_per_second": 106.313, + "eval_steps_per_second": 3.341, + "step": 49152 + }, + { + "epoch": 4.94, + "learning_rate": 6.629290842923184e-07, + "loss": 0.2315, + "step": 50176 + }, + { + "epoch": 4.94, + "eval_loss": 0.4303137958049774, + "eval_runtime": 32.4187, + "eval_samples_per_second": 106.019, + "eval_steps_per_second": 3.331, + "step": 50176 + }, + { + "epoch": 5.0, + "step": 50835, + "total_flos": 4.2534856293423514e+17, + "train_loss": 0.24000135361532715, + "train_runtime": 35584.6801, + "train_samples_per_second": 51.426, + "train_steps_per_second": 1.429 + } + ], + "max_steps": 50835, + "num_train_epochs": 5, + "total_flos": 4.2534856293423514e+17, "trial_name": null, "trial_params": null } diff --git a/manual_upload/training_args.bin b/manual_upload/training_args.bin index 3a1307a6fc7a9262833f981703264dc0bd753507..fe0b86ed4a6ab1c86797fadcaa43d46ee74f7857 100644 --- a/manual_upload/training_args.bin +++ b/manual_upload/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0206a7605fed29c8321de91dfcf793fb6150f09dc4e519e1121bb6e0b17b29fc -size 3515 +oid sha256:bdf489f059ab44f9bc26200d314d3fc5954dabfb501b51ecc19cdc4d4be8a527 +size 3579 diff --git a/manual_upload/training_args.json b/manual_upload/training_args.json index f16990343e6c8bf86f1ed5fd8f642297ee873ac0..3f2e1b7806e9664e694aea2d005b14d1fa1a4e63 100644 --- a/manual_upload/training_args.json +++ b/manual_upload/training_args.json @@ -6,34 +6,34 @@ "do_predict": false, "evaluation_strategy": "steps", "prediction_loss_only": false, - "per_device_train_batch_size": 10, + "per_device_train_batch_size": 9, "per_device_eval_batch_size": 8, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 1, "eval_accumulation_steps": null, "eval_delay": 0, - "learning_rate": 0.0005, - "weight_decay": 0.1, + "learning_rate": 5e-05, + "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, - "num_train_epochs": 6, + "num_train_epochs": 5, "max_steps": -1, - "lr_scheduler_type": "cosine", + "lr_scheduler_type": "linear", "warmup_ratio": 0.0, - "warmup_steps": 200, + "warmup_steps": 0, "log_level": "passive", "log_level_replica": "passive", "log_on_each_node": true, "logging_dir": "models/elec-gmusic-familized/logs", "logging_strategy": "steps", "logging_first_step": false, - "logging_steps": 4096, + "logging_steps": 1024, "logging_nan_inf_filter": true, "save_strategy": "steps", - "save_steps": 16384, + "save_steps": 4096, "save_total_limit": 5, "save_on_each_node": false, "no_cuda": false, @@ -55,7 +55,7 @@ "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": false, - "eval_steps": 4096, + "eval_steps": 1024, "dataloader_num_workers": 0, "past_index": -1, "run_name": "models/elec-gmusic-familized",