Amirhossein75 commited on Aug 25

Commit

40d8691

1 Parent(s): 71e4fd0

Initial model upload

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.idea/.gitignore +3 -0
added_tokens.json +4 -0
checkpoint-1000/added_tokens.json +4 -0
checkpoint-1000/config.json +91 -0
checkpoint-1000/generation_config.json +9 -0
checkpoint-1000/model.safetensors +3 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/preprocessor_config.json +19 -0
checkpoint-1000/rng_state.pth +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/special_tokens_map.json +13 -0
checkpoint-1000/spm_char.model +3 -0
checkpoint-1000/tokenizer_config.json +64 -0
checkpoint-1000/trainer_state.json +322 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-2000/added_tokens.json +4 -0
checkpoint-2000/config.json +91 -0
checkpoint-2000/generation_config.json +9 -0
checkpoint-2000/model.safetensors +3 -0
checkpoint-2000/optimizer.pt +3 -0
checkpoint-2000/preprocessor_config.json +19 -0
checkpoint-2000/rng_state.pth +3 -0
checkpoint-2000/scheduler.pt +3 -0
checkpoint-2000/special_tokens_map.json +13 -0
checkpoint-2000/spm_char.model +3 -0
checkpoint-2000/tokenizer_config.json +64 -0
checkpoint-2000/trainer_state.json +610 -0
checkpoint-2000/training_args.bin +3 -0
checkpoint-3000/added_tokens.json +4 -0
checkpoint-3000/config.json +91 -0
checkpoint-3000/generation_config.json +9 -0
checkpoint-3000/model.safetensors +3 -0
checkpoint-3000/optimizer.pt +3 -0
checkpoint-3000/preprocessor_config.json +19 -0
checkpoint-3000/rng_state.pth +3 -0
checkpoint-3000/scheduler.pt +3 -0
checkpoint-3000/special_tokens_map.json +13 -0
checkpoint-3000/spm_char.model +3 -0
checkpoint-3000/tokenizer_config.json +64 -0
checkpoint-3000/trainer_state.json +898 -0
checkpoint-3000/training_args.bin +3 -0
checkpoint-4000/added_tokens.json +4 -0
checkpoint-4000/config.json +91 -0
checkpoint-4000/generation_config.json +9 -0
checkpoint-4000/model.safetensors +3 -0
checkpoint-4000/optimizer.pt +3 -0
checkpoint-4000/preprocessor_config.json +19 -0
checkpoint-4000/rng_state.pth +3 -0
checkpoint-4000/scheduler.pt +3 -0
checkpoint-4000/special_tokens_map.json +13 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}

checkpoint-1000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}

checkpoint-1000/generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}

checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb5d6fe49ff85411787439f9ad2e6bfa7affebb9cb657848d6ca12433db4e10a
+size 577789320

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a801f2d4ec47bf11dfadfa6c068daebd7c9d851603bd0a0eef429e5a22f6bb2e
+size 1155777946

checkpoint-1000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "do_normalize": false,
+  "feature_extractor_type": "SpeechT5FeatureExtractor",
+  "feature_size": 1,
+  "fmax": 7600,
+  "fmin": 80,
+  "frame_signal_scale": 1.0,
+  "hop_length": 16,
+  "mel_floor": 1e-10,
+  "num_mel_bins": 80,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "SpeechT5Processor",
+  "reduction_factor": 2,
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "win_function": "hann_window",
+  "win_length": 64
+}

checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f27257904c7decb41a03da01a49d9f6fdf1f1b8f5e5d56fe64ef4572336d6eb
+size 14645

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5186565a906d7db433e54fbfdb3d62aa206e2cb82464d6a3316608741a692047
+size 1465

checkpoint-1000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

checkpoint-1000/spm_char.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473

checkpoint-1000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,322 @@

+{
+  "best_global_step": 1000,
+  "best_metric": 0.9205830097198486,
+  "best_model_checkpoint": "runs/emotts_ravdess\\checkpoint-1000",
+  "epoch": 24.395061728395063,
+  "eval_steps": 1000,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6172839506172839,
+      "grad_norm": 46.678199768066406,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 3.4472,
+      "step": 25
+    },
+    {
+      "epoch": 1.2222222222222223,
+      "grad_norm": 26.903335571289062,
+      "learning_rate": 9.800000000000001e-07,
+      "loss": 2.9051,
+      "step": 50
+    },
+    {
+      "epoch": 1.8395061728395061,
+      "grad_norm": 16.712799072265625,
+      "learning_rate": 1.48e-06,
+      "loss": 2.2302,
+      "step": 75
+    },
+    {
+      "epoch": 2.4444444444444446,
+      "grad_norm": 11.607951164245605,
+      "learning_rate": 1.98e-06,
+      "loss": 1.7683,
+      "step": 100
+    },
+    {
+      "epoch": 3.049382716049383,
+      "grad_norm": 7.216983318328857,
+      "learning_rate": 2.4800000000000004e-06,
+      "loss": 1.5434,
+      "step": 125
+    },
+    {
+      "epoch": 3.6666666666666665,
+      "grad_norm": 10.899630546569824,
+      "learning_rate": 2.9800000000000003e-06,
+      "loss": 1.4385,
+      "step": 150
+    },
+    {
+      "epoch": 4.271604938271605,
+      "grad_norm": 6.701765537261963,
+      "learning_rate": 3.48e-06,
+      "loss": 1.3262,
+      "step": 175
+    },
+    {
+      "epoch": 4.888888888888889,
+      "grad_norm": 9.419053077697754,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 1.285,
+      "step": 200
+    },
+    {
+      "epoch": 5.493827160493828,
+      "grad_norm": 5.913278579711914,
+      "learning_rate": 4.48e-06,
+      "loss": 1.2503,
+      "step": 225
+    },
+    {
+      "epoch": 6.098765432098766,
+      "grad_norm": 8.171669006347656,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 1.1868,
+      "step": 250
+    },
+    {
+      "epoch": 6.716049382716049,
+      "grad_norm": 5.54558801651001,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 1.1478,
+      "step": 275
+    },
+    {
+      "epoch": 7.320987654320987,
+      "grad_norm": 5.325434684753418,
+      "learning_rate": 5.98e-06,
+      "loss": 1.1245,
+      "step": 300
+    },
+    {
+      "epoch": 7.938271604938271,
+      "grad_norm": 5.406148433685303,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 1.1145,
+      "step": 325
+    },
+    {
+      "epoch": 8.54320987654321,
+      "grad_norm": 8.461536407470703,
+      "learning_rate": 6.98e-06,
+      "loss": 1.0641,
+      "step": 350
+    },
+    {
+      "epoch": 9.148148148148149,
+      "grad_norm": 3.8533031940460205,
+      "learning_rate": 7.48e-06,
+      "loss": 1.0573,
+      "step": 375
+    },
+    {
+      "epoch": 9.765432098765432,
+      "grad_norm": 7.569976806640625,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 1.061,
+      "step": 400
+    },
+    {
+      "epoch": 10.37037037037037,
+      "grad_norm": 10.156228065490723,
+      "learning_rate": 8.48e-06,
+      "loss": 1.0485,
+      "step": 425
+    },
+    {
+      "epoch": 10.987654320987655,
+      "grad_norm": 4.668756484985352,
+      "learning_rate": 8.98e-06,
+      "loss": 1.0216,
+      "step": 450
+    },
+    {
+      "epoch": 11.592592592592592,
+      "grad_norm": 5.087125301361084,
+      "learning_rate": 9.48e-06,
+      "loss": 1.0319,
+      "step": 475
+    },
+    {
+      "epoch": 12.197530864197532,
+      "grad_norm": 7.943349361419678,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 1.0,
+      "step": 500
+    },
+    {
+      "epoch": 12.814814814814815,
+      "grad_norm": 7.655898571014404,
+      "learning_rate": 9.931428571428571e-06,
+      "loss": 1.0052,
+      "step": 525
+    },
+    {
+      "epoch": 13.419753086419753,
+      "grad_norm": 4.458106994628906,
+      "learning_rate": 9.86e-06,
+      "loss": 1.0001,
+      "step": 550
+    },
+    {
+      "epoch": 14.024691358024691,
+      "grad_norm": 9.058222770690918,
+      "learning_rate": 9.78857142857143e-06,
+      "loss": 1.0015,
+      "step": 575
+    },
+    {
+      "epoch": 14.641975308641975,
+      "grad_norm": 4.795205593109131,
+      "learning_rate": 9.717142857142858e-06,
+      "loss": 0.9836,
+      "step": 600
+    },
+    {
+      "epoch": 15.246913580246913,
+      "grad_norm": 10.566876411437988,
+      "learning_rate": 9.645714285714286e-06,
+      "loss": 1.0019,
+      "step": 625
+    },
+    {
+      "epoch": 15.864197530864198,
+      "grad_norm": 7.610626220703125,
+      "learning_rate": 9.574285714285715e-06,
+      "loss": 0.9779,
+      "step": 650
+    },
+    {
+      "epoch": 16.469135802469136,
+      "grad_norm": 6.008159637451172,
+      "learning_rate": 9.502857142857144e-06,
+      "loss": 0.9798,
+      "step": 675
+    },
+    {
+      "epoch": 17.074074074074073,
+      "grad_norm": 6.685286521911621,
+      "learning_rate": 9.431428571428573e-06,
+      "loss": 0.9753,
+      "step": 700
+    },
+    {
+      "epoch": 17.691358024691358,
+      "grad_norm": 2.7540247440338135,
+      "learning_rate": 9.360000000000002e-06,
+      "loss": 0.967,
+      "step": 725
+    },
+    {
+      "epoch": 18.296296296296298,
+      "grad_norm": 4.825072288513184,
+      "learning_rate": 9.28857142857143e-06,
+      "loss": 0.9575,
+      "step": 750
+    },
+    {
+      "epoch": 18.91358024691358,
+      "grad_norm": 6.618119716644287,
+      "learning_rate": 9.217142857142858e-06,
+      "loss": 0.9675,
+      "step": 775
+    },
+    {
+      "epoch": 19.51851851851852,
+      "grad_norm": 5.465808391571045,
+      "learning_rate": 9.145714285714287e-06,
+      "loss": 0.9626,
+      "step": 800
+    },
+    {
+      "epoch": 20.123456790123456,
+      "grad_norm": 4.9501051902771,
+      "learning_rate": 9.074285714285716e-06,
+      "loss": 0.9638,
+      "step": 825
+    },
+    {
+      "epoch": 20.74074074074074,
+      "grad_norm": 4.926831245422363,
+      "learning_rate": 9.002857142857144e-06,
+      "loss": 0.9582,
+      "step": 850
+    },
+    {
+      "epoch": 21.34567901234568,
+      "grad_norm": 6.605464458465576,
+      "learning_rate": 8.931428571428573e-06,
+      "loss": 0.9551,
+      "step": 875
+    },
+    {
+      "epoch": 21.962962962962962,
+      "grad_norm": 5.774538040161133,
+      "learning_rate": 8.860000000000002e-06,
+      "loss": 0.9596,
+      "step": 900
+    },
+    {
+      "epoch": 22.567901234567902,
+      "grad_norm": 4.304802417755127,
+      "learning_rate": 8.788571428571429e-06,
+      "loss": 0.9489,
+      "step": 925
+    },
+    {
+      "epoch": 23.17283950617284,
+      "grad_norm": 5.171604633331299,
+      "learning_rate": 8.717142857142858e-06,
+      "loss": 0.953,
+      "step": 950
+    },
+    {
+      "epoch": 23.790123456790123,
+      "grad_norm": 7.152281761169434,
+      "learning_rate": 8.645714285714287e-06,
+      "loss": 0.9604,
+      "step": 975
+    },
+    {
+      "epoch": 24.395061728395063,
+      "grad_norm": 4.954558849334717,
+      "learning_rate": 8.574285714285714e-06,
+      "loss": 0.9489,
+      "step": 1000
+    },
+    {
+      "epoch": 24.395061728395063,
+      "eval_loss": 0.9205830097198486,
+      "eval_runtime": 2.2708,
+      "eval_samples_per_second": 63.413,
+      "eval_steps_per_second": 31.707,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 98,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 821472814356480.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66d367dc04409d63341644c780ebdb997e8756f9aa9f6d110afc5d9ab8de84be
+size 5905

checkpoint-2000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}

checkpoint-2000/config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}

checkpoint-2000/generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}

checkpoint-2000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:295a846f5d0ead4e65b737b369b8205cd013a02d08d0220b3caa7e8e4b777b77
+size 577789320

checkpoint-2000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2388ca0f503df54eb4d30573ff0fc9814dd98cc0759ae40bf1b7438f984e1ab6
+size 1155777946

checkpoint-2000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "do_normalize": false,
+  "feature_extractor_type": "SpeechT5FeatureExtractor",
+  "feature_size": 1,
+  "fmax": 7600,
+  "fmin": 80,
+  "frame_signal_scale": 1.0,
+  "hop_length": 16,
+  "mel_floor": 1e-10,
+  "num_mel_bins": 80,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "SpeechT5Processor",
+  "reduction_factor": 2,
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "win_function": "hann_window",
+  "win_length": 64
+}

checkpoint-2000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:396a8cc8a565882c2cc697e78085381bcb24a262358918ccaa5445eb5232e231
+size 14645

checkpoint-2000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e92941487269a9e704ed42d0796c2eb3245e8d6d83c68a723be04187c99b397
+size 1465

checkpoint-2000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

checkpoint-2000/spm_char.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473

checkpoint-2000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-2000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,610 @@

+{
+  "best_global_step": 2000,
+  "best_metric": 0.8953001499176025,
+  "best_model_checkpoint": "runs/emotts_ravdess\\checkpoint-2000",
+  "epoch": 48.79012345679013,
+  "eval_steps": 1000,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6172839506172839,
+      "grad_norm": 46.678199768066406,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 3.4472,
+      "step": 25
+    },
+    {
+      "epoch": 1.2222222222222223,
+      "grad_norm": 26.903335571289062,
+      "learning_rate": 9.800000000000001e-07,
+      "loss": 2.9051,
+      "step": 50
+    },
+    {
+      "epoch": 1.8395061728395061,
+      "grad_norm": 16.712799072265625,
+      "learning_rate": 1.48e-06,
+      "loss": 2.2302,
+      "step": 75
+    },
+    {
+      "epoch": 2.4444444444444446,
+      "grad_norm": 11.607951164245605,
+      "learning_rate": 1.98e-06,
+      "loss": 1.7683,
+      "step": 100
+    },
+    {
+      "epoch": 3.049382716049383,
+      "grad_norm": 7.216983318328857,
+      "learning_rate": 2.4800000000000004e-06,
+      "loss": 1.5434,
+      "step": 125
+    },
+    {
+      "epoch": 3.6666666666666665,
+      "grad_norm": 10.899630546569824,
+      "learning_rate": 2.9800000000000003e-06,
+      "loss": 1.4385,
+      "step": 150
+    },
+    {
+      "epoch": 4.271604938271605,
+      "grad_norm": 6.701765537261963,
+      "learning_rate": 3.48e-06,
+      "loss": 1.3262,
+      "step": 175
+    },
+    {
+      "epoch": 4.888888888888889,
+      "grad_norm": 9.419053077697754,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 1.285,
+      "step": 200
+    },
+    {
+      "epoch": 5.493827160493828,
+      "grad_norm": 5.913278579711914,
+      "learning_rate": 4.48e-06,
+      "loss": 1.2503,
+      "step": 225
+    },
+    {
+      "epoch": 6.098765432098766,
+      "grad_norm": 8.171669006347656,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 1.1868,
+      "step": 250
+    },
+    {
+      "epoch": 6.716049382716049,
+      "grad_norm": 5.54558801651001,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 1.1478,
+      "step": 275
+    },
+    {
+      "epoch": 7.320987654320987,
+      "grad_norm": 5.325434684753418,
+      "learning_rate": 5.98e-06,
+      "loss": 1.1245,
+      "step": 300
+    },
+    {
+      "epoch": 7.938271604938271,
+      "grad_norm": 5.406148433685303,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 1.1145,
+      "step": 325
+    },
+    {
+      "epoch": 8.54320987654321,
+      "grad_norm": 8.461536407470703,
+      "learning_rate": 6.98e-06,
+      "loss": 1.0641,
+      "step": 350
+    },
+    {
+      "epoch": 9.148148148148149,
+      "grad_norm": 3.8533031940460205,
+      "learning_rate": 7.48e-06,
+      "loss": 1.0573,
+      "step": 375
+    },
+    {
+      "epoch": 9.765432098765432,
+      "grad_norm": 7.569976806640625,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 1.061,
+      "step": 400
+    },
+    {
+      "epoch": 10.37037037037037,
+      "grad_norm": 10.156228065490723,
+      "learning_rate": 8.48e-06,
+      "loss": 1.0485,
+      "step": 425
+    },
+    {
+      "epoch": 10.987654320987655,
+      "grad_norm": 4.668756484985352,
+      "learning_rate": 8.98e-06,
+      "loss": 1.0216,
+      "step": 450
+    },
+    {
+      "epoch": 11.592592592592592,
+      "grad_norm": 5.087125301361084,
+      "learning_rate": 9.48e-06,
+      "loss": 1.0319,
+      "step": 475
+    },
+    {
+      "epoch": 12.197530864197532,
+      "grad_norm": 7.943349361419678,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 1.0,
+      "step": 500
+    },
+    {
+      "epoch": 12.814814814814815,
+      "grad_norm": 7.655898571014404,
+      "learning_rate": 9.931428571428571e-06,
+      "loss": 1.0052,
+      "step": 525
+    },
+    {
+      "epoch": 13.419753086419753,
+      "grad_norm": 4.458106994628906,
+      "learning_rate": 9.86e-06,
+      "loss": 1.0001,
+      "step": 550
+    },
+    {
+      "epoch": 14.024691358024691,
+      "grad_norm": 9.058222770690918,
+      "learning_rate": 9.78857142857143e-06,
+      "loss": 1.0015,
+      "step": 575
+    },
+    {
+      "epoch": 14.641975308641975,
+      "grad_norm": 4.795205593109131,
+      "learning_rate": 9.717142857142858e-06,
+      "loss": 0.9836,
+      "step": 600
+    },
+    {
+      "epoch": 15.246913580246913,
+      "grad_norm": 10.566876411437988,
+      "learning_rate": 9.645714285714286e-06,
+      "loss": 1.0019,
+      "step": 625
+    },
+    {
+      "epoch": 15.864197530864198,
+      "grad_norm": 7.610626220703125,
+      "learning_rate": 9.574285714285715e-06,
+      "loss": 0.9779,
+      "step": 650
+    },
+    {
+      "epoch": 16.469135802469136,
+      "grad_norm": 6.008159637451172,
+      "learning_rate": 9.502857142857144e-06,
+      "loss": 0.9798,
+      "step": 675
+    },
+    {
+      "epoch": 17.074074074074073,
+      "grad_norm": 6.685286521911621,
+      "learning_rate": 9.431428571428573e-06,
+      "loss": 0.9753,
+      "step": 700
+    },
+    {
+      "epoch": 17.691358024691358,
+      "grad_norm": 2.7540247440338135,
+      "learning_rate": 9.360000000000002e-06,
+      "loss": 0.967,
+      "step": 725
+    },
+    {
+      "epoch": 18.296296296296298,
+      "grad_norm": 4.825072288513184,
+      "learning_rate": 9.28857142857143e-06,
+      "loss": 0.9575,
+      "step": 750
+    },
+    {
+      "epoch": 18.91358024691358,
+      "grad_norm": 6.618119716644287,
+      "learning_rate": 9.217142857142858e-06,
+      "loss": 0.9675,
+      "step": 775
+    },
+    {
+      "epoch": 19.51851851851852,
+      "grad_norm": 5.465808391571045,
+      "learning_rate": 9.145714285714287e-06,
+      "loss": 0.9626,
+      "step": 800
+    },
+    {
+      "epoch": 20.123456790123456,
+      "grad_norm": 4.9501051902771,
+      "learning_rate": 9.074285714285716e-06,
+      "loss": 0.9638,
+      "step": 825
+    },
+    {
+      "epoch": 20.74074074074074,
+      "grad_norm": 4.926831245422363,
+      "learning_rate": 9.002857142857144e-06,
+      "loss": 0.9582,
+      "step": 850
+    },
+    {
+      "epoch": 21.34567901234568,
+      "grad_norm": 6.605464458465576,
+      "learning_rate": 8.931428571428573e-06,
+      "loss": 0.9551,
+      "step": 875
+    },
+    {
+      "epoch": 21.962962962962962,
+      "grad_norm": 5.774538040161133,
+      "learning_rate": 8.860000000000002e-06,
+      "loss": 0.9596,
+      "step": 900
+    },
+    {
+      "epoch": 22.567901234567902,
+      "grad_norm": 4.304802417755127,
+      "learning_rate": 8.788571428571429e-06,
+      "loss": 0.9489,
+      "step": 925
+    },
+    {
+      "epoch": 23.17283950617284,
+      "grad_norm": 5.171604633331299,
+      "learning_rate": 8.717142857142858e-06,
+      "loss": 0.953,
+      "step": 950
+    },
+    {
+      "epoch": 23.790123456790123,
+      "grad_norm": 7.152281761169434,
+      "learning_rate": 8.645714285714287e-06,
+      "loss": 0.9604,
+      "step": 975
+    },
+    {
+      "epoch": 24.395061728395063,
+      "grad_norm": 4.954558849334717,
+      "learning_rate": 8.574285714285714e-06,
+      "loss": 0.9489,
+      "step": 1000
+    },
+    {
+      "epoch": 24.395061728395063,
+      "eval_loss": 0.9205830097198486,
+      "eval_runtime": 2.2708,
+      "eval_samples_per_second": 63.413,
+      "eval_steps_per_second": 31.707,
+      "step": 1000
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 10.266937255859375,
+      "learning_rate": 8.502857142857143e-06,
+      "loss": 0.9541,
+      "step": 1025
+    },
+    {
+      "epoch": 25.617283950617285,
+      "grad_norm": 3.225881814956665,
+      "learning_rate": 8.431428571428572e-06,
+      "loss": 0.9451,
+      "step": 1050
+    },
+    {
+      "epoch": 26.22222222222222,
+      "grad_norm": 4.001440048217773,
+      "learning_rate": 8.36e-06,
+      "loss": 0.9422,
+      "step": 1075
+    },
+    {
+      "epoch": 26.839506172839506,
+      "grad_norm": 5.347984313964844,
+      "learning_rate": 8.288571428571429e-06,
+      "loss": 0.9434,
+      "step": 1100
+    },
+    {
+      "epoch": 27.444444444444443,
+      "grad_norm": 4.1566901206970215,
+      "learning_rate": 8.217142857142858e-06,
+      "loss": 0.942,
+      "step": 1125
+    },
+    {
+      "epoch": 28.049382716049383,
+      "grad_norm": 3.2101686000823975,
+      "learning_rate": 8.145714285714287e-06,
+      "loss": 0.9365,
+      "step": 1150
+    },
+    {
+      "epoch": 28.666666666666668,
+      "grad_norm": 5.183631896972656,
+      "learning_rate": 8.074285714285714e-06,
+      "loss": 0.941,
+      "step": 1175
+    },
+    {
+      "epoch": 29.271604938271604,
+      "grad_norm": 4.704529285430908,
+      "learning_rate": 8.002857142857143e-06,
+      "loss": 0.9374,
+      "step": 1200
+    },
+    {
+      "epoch": 29.88888888888889,
+      "grad_norm": 4.460058689117432,
+      "learning_rate": 7.931428571428572e-06,
+      "loss": 0.9383,
+      "step": 1225
+    },
+    {
+      "epoch": 30.493827160493826,
+      "grad_norm": 3.616530418395996,
+      "learning_rate": 7.860000000000001e-06,
+      "loss": 0.9321,
+      "step": 1250
+    },
+    {
+      "epoch": 31.098765432098766,
+      "grad_norm": 3.92207932472229,
+      "learning_rate": 7.788571428571428e-06,
+      "loss": 0.9347,
+      "step": 1275
+    },
+    {
+      "epoch": 31.71604938271605,
+      "grad_norm": 3.6962461471557617,
+      "learning_rate": 7.717142857142857e-06,
+      "loss": 0.9305,
+      "step": 1300
+    },
+    {
+      "epoch": 32.32098765432099,
+      "grad_norm": 4.276056289672852,
+      "learning_rate": 7.645714285714286e-06,
+      "loss": 0.9336,
+      "step": 1325
+    },
+    {
+      "epoch": 32.93827160493827,
+      "grad_norm": 5.176277160644531,
+      "learning_rate": 7.574285714285715e-06,
+      "loss": 0.9351,
+      "step": 1350
+    },
+    {
+      "epoch": 33.54320987654321,
+      "grad_norm": 7.2538347244262695,
+      "learning_rate": 7.502857142857144e-06,
+      "loss": 0.9241,
+      "step": 1375
+    },
+    {
+      "epoch": 34.148148148148145,
+      "grad_norm": 4.3576273918151855,
+      "learning_rate": 7.431428571428572e-06,
+      "loss": 0.9316,
+      "step": 1400
+    },
+    {
+      "epoch": 34.76543209876543,
+      "grad_norm": 9.138855934143066,
+      "learning_rate": 7.360000000000001e-06,
+      "loss": 0.9277,
+      "step": 1425
+    },
+    {
+      "epoch": 35.370370370370374,
+      "grad_norm": 4.475003719329834,
+      "learning_rate": 7.28857142857143e-06,
+      "loss": 0.9245,
+      "step": 1450
+    },
+    {
+      "epoch": 35.98765432098765,
+      "grad_norm": 7.28753137588501,
+      "learning_rate": 7.217142857142858e-06,
+      "loss": 0.9266,
+      "step": 1475
+    },
+    {
+      "epoch": 36.592592592592595,
+      "grad_norm": 5.1342949867248535,
+      "learning_rate": 7.145714285714286e-06,
+      "loss": 0.9297,
+      "step": 1500
+    },
+    {
+      "epoch": 37.19753086419753,
+      "grad_norm": 2.7765142917633057,
+      "learning_rate": 7.074285714285715e-06,
+      "loss": 0.9253,
+      "step": 1525
+    },
+    {
+      "epoch": 37.81481481481482,
+      "grad_norm": 3.8011326789855957,
+      "learning_rate": 7.002857142857143e-06,
+      "loss": 0.9203,
+      "step": 1550
+    },
+    {
+      "epoch": 38.41975308641975,
+      "grad_norm": 7.432782173156738,
+      "learning_rate": 6.931428571428572e-06,
+      "loss": 0.9196,
+      "step": 1575
+    },
+    {
+      "epoch": 39.02469135802469,
+      "grad_norm": 4.179474830627441,
+      "learning_rate": 6.860000000000001e-06,
+      "loss": 0.9188,
+      "step": 1600
+    },
+    {
+      "epoch": 39.641975308641975,
+      "grad_norm": 8.513073921203613,
+      "learning_rate": 6.7885714285714286e-06,
+      "loss": 0.9268,
+      "step": 1625
+    },
+    {
+      "epoch": 40.24691358024691,
+      "grad_norm": 3.699882984161377,
+      "learning_rate": 6.7171428571428576e-06,
+      "loss": 0.9216,
+      "step": 1650
+    },
+    {
+      "epoch": 40.864197530864196,
+      "grad_norm": 3.949507713317871,
+      "learning_rate": 6.645714285714287e-06,
+      "loss": 0.9238,
+      "step": 1675
+    },
+    {
+      "epoch": 41.46913580246913,
+      "grad_norm": 3.7951810359954834,
+      "learning_rate": 6.574285714285716e-06,
+      "loss": 0.9198,
+      "step": 1700
+    },
+    {
+      "epoch": 42.074074074074076,
+      "grad_norm": 5.373620986938477,
+      "learning_rate": 6.502857142857143e-06,
+      "loss": 0.9135,
+      "step": 1725
+    },
+    {
+      "epoch": 42.69135802469136,
+      "grad_norm": 6.875067234039307,
+      "learning_rate": 6.431428571428572e-06,
+      "loss": 0.918,
+      "step": 1750
+    },
+    {
+      "epoch": 43.2962962962963,
+      "grad_norm": 7.167726039886475,
+      "learning_rate": 6.360000000000001e-06,
+      "loss": 0.9276,
+      "step": 1775
+    },
+    {
+      "epoch": 43.91358024691358,
+      "grad_norm": 3.7067105770111084,
+      "learning_rate": 6.288571428571429e-06,
+      "loss": 0.9169,
+      "step": 1800
+    },
+    {
+      "epoch": 44.51851851851852,
+      "grad_norm": 4.474793434143066,
+      "learning_rate": 6.217142857142857e-06,
+      "loss": 0.9191,
+      "step": 1825
+    },
+    {
+      "epoch": 45.123456790123456,
+      "grad_norm": 5.386421203613281,
+      "learning_rate": 6.145714285714286e-06,
+      "loss": 0.9145,
+      "step": 1850
+    },
+    {
+      "epoch": 45.74074074074074,
+      "grad_norm": 3.068861246109009,
+      "learning_rate": 6.0742857142857145e-06,
+      "loss": 0.9095,
+      "step": 1875
+    },
+    {
+      "epoch": 46.34567901234568,
+      "grad_norm": 3.804973840713501,
+      "learning_rate": 6.0028571428571435e-06,
+      "loss": 0.912,
+      "step": 1900
+    },
+    {
+      "epoch": 46.96296296296296,
+      "grad_norm": 2.9225473403930664,
+      "learning_rate": 5.9314285714285725e-06,
+      "loss": 0.9049,
+      "step": 1925
+    },
+    {
+      "epoch": 47.5679012345679,
+      "grad_norm": 4.022708892822266,
+      "learning_rate": 5.86e-06,
+      "loss": 0.9049,
+      "step": 1950
+    },
+    {
+      "epoch": 48.17283950617284,
+      "grad_norm": 3.421691417694092,
+      "learning_rate": 5.788571428571429e-06,
+      "loss": 0.9101,
+      "step": 1975
+    },
+    {
+      "epoch": 48.79012345679013,
+      "grad_norm": 6.732350826263428,
+      "learning_rate": 5.717142857142858e-06,
+      "loss": 0.9105,
+      "step": 2000
+    },
+    {
+      "epoch": 48.79012345679013,
+      "eval_loss": 0.8953001499176025,
+      "eval_runtime": 2.1587,
+      "eval_samples_per_second": 66.707,
+      "eval_steps_per_second": 33.353,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 98,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1642945628712960.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66d367dc04409d63341644c780ebdb997e8756f9aa9f6d110afc5d9ab8de84be
+size 5905

checkpoint-3000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}

checkpoint-3000/config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}

checkpoint-3000/generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}

checkpoint-3000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f6eca9575648e4c7d7eb1ea916fee7b23eafefa0db8bf09a04bd46beac454f2
+size 577789320

checkpoint-3000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8c7d0e8b916fd9a744e0e04850570d8a6297e6bac0767ebd63b53e0cefe4057
+size 1155777946

checkpoint-3000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "do_normalize": false,
+  "feature_extractor_type": "SpeechT5FeatureExtractor",
+  "feature_size": 1,
+  "fmax": 7600,
+  "fmin": 80,
+  "frame_signal_scale": 1.0,
+  "hop_length": 16,
+  "mel_floor": 1e-10,
+  "num_mel_bins": 80,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "SpeechT5Processor",
+  "reduction_factor": 2,
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "win_function": "hann_window",
+  "win_length": 64
+}

checkpoint-3000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4bd7be6ad18d8737c21def51bc146679a3086895043a68047db9ee35a01b64e8
+size 14645

checkpoint-3000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32cb3a0b1d61782860d37955716f6b5e952b190320ed6c3b93171c974f9325c9
+size 1465

checkpoint-3000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

checkpoint-3000/spm_char.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473

checkpoint-3000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-3000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,898 @@

+{
+  "best_global_step": 3000,
+  "best_metric": 0.8869494795799255,
+  "best_model_checkpoint": "runs/emotts_ravdess\\checkpoint-3000",
+  "epoch": 73.17283950617283,
+  "eval_steps": 1000,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6172839506172839,
+      "grad_norm": 46.678199768066406,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 3.4472,
+      "step": 25
+    },
+    {
+      "epoch": 1.2222222222222223,
+      "grad_norm": 26.903335571289062,
+      "learning_rate": 9.800000000000001e-07,
+      "loss": 2.9051,
+      "step": 50
+    },
+    {
+      "epoch": 1.8395061728395061,
+      "grad_norm": 16.712799072265625,
+      "learning_rate": 1.48e-06,
+      "loss": 2.2302,
+      "step": 75
+    },
+    {
+      "epoch": 2.4444444444444446,
+      "grad_norm": 11.607951164245605,
+      "learning_rate": 1.98e-06,
+      "loss": 1.7683,
+      "step": 100
+    },
+    {
+      "epoch": 3.049382716049383,
+      "grad_norm": 7.216983318328857,
+      "learning_rate": 2.4800000000000004e-06,
+      "loss": 1.5434,
+      "step": 125
+    },
+    {
+      "epoch": 3.6666666666666665,
+      "grad_norm": 10.899630546569824,
+      "learning_rate": 2.9800000000000003e-06,
+      "loss": 1.4385,
+      "step": 150
+    },
+    {
+      "epoch": 4.271604938271605,
+      "grad_norm": 6.701765537261963,
+      "learning_rate": 3.48e-06,
+      "loss": 1.3262,
+      "step": 175
+    },
+    {
+      "epoch": 4.888888888888889,
+      "grad_norm": 9.419053077697754,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 1.285,
+      "step": 200
+    },
+    {
+      "epoch": 5.493827160493828,
+      "grad_norm": 5.913278579711914,
+      "learning_rate": 4.48e-06,
+      "loss": 1.2503,
+      "step": 225
+    },
+    {
+      "epoch": 6.098765432098766,
+      "grad_norm": 8.171669006347656,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 1.1868,
+      "step": 250
+    },
+    {
+      "epoch": 6.716049382716049,
+      "grad_norm": 5.54558801651001,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 1.1478,
+      "step": 275
+    },
+    {
+      "epoch": 7.320987654320987,
+      "grad_norm": 5.325434684753418,
+      "learning_rate": 5.98e-06,
+      "loss": 1.1245,
+      "step": 300
+    },
+    {
+      "epoch": 7.938271604938271,
+      "grad_norm": 5.406148433685303,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 1.1145,
+      "step": 325
+    },
+    {
+      "epoch": 8.54320987654321,
+      "grad_norm": 8.461536407470703,
+      "learning_rate": 6.98e-06,
+      "loss": 1.0641,
+      "step": 350
+    },
+    {
+      "epoch": 9.148148148148149,
+      "grad_norm": 3.8533031940460205,
+      "learning_rate": 7.48e-06,
+      "loss": 1.0573,
+      "step": 375
+    },
+    {
+      "epoch": 9.765432098765432,
+      "grad_norm": 7.569976806640625,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 1.061,
+      "step": 400
+    },
+    {
+      "epoch": 10.37037037037037,
+      "grad_norm": 10.156228065490723,
+      "learning_rate": 8.48e-06,
+      "loss": 1.0485,
+      "step": 425
+    },
+    {
+      "epoch": 10.987654320987655,
+      "grad_norm": 4.668756484985352,
+      "learning_rate": 8.98e-06,
+      "loss": 1.0216,
+      "step": 450
+    },
+    {
+      "epoch": 11.592592592592592,
+      "grad_norm": 5.087125301361084,
+      "learning_rate": 9.48e-06,
+      "loss": 1.0319,
+      "step": 475
+    },
+    {
+      "epoch": 12.197530864197532,
+      "grad_norm": 7.943349361419678,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 1.0,
+      "step": 500
+    },
+    {
+      "epoch": 12.814814814814815,
+      "grad_norm": 7.655898571014404,
+      "learning_rate": 9.931428571428571e-06,
+      "loss": 1.0052,
+      "step": 525
+    },
+    {
+      "epoch": 13.419753086419753,
+      "grad_norm": 4.458106994628906,
+      "learning_rate": 9.86e-06,
+      "loss": 1.0001,
+      "step": 550
+    },
+    {
+      "epoch": 14.024691358024691,
+      "grad_norm": 9.058222770690918,
+      "learning_rate": 9.78857142857143e-06,
+      "loss": 1.0015,
+      "step": 575
+    },
+    {
+      "epoch": 14.641975308641975,
+      "grad_norm": 4.795205593109131,
+      "learning_rate": 9.717142857142858e-06,
+      "loss": 0.9836,
+      "step": 600
+    },
+    {
+      "epoch": 15.246913580246913,
+      "grad_norm": 10.566876411437988,
+      "learning_rate": 9.645714285714286e-06,
+      "loss": 1.0019,
+      "step": 625
+    },
+    {
+      "epoch": 15.864197530864198,
+      "grad_norm": 7.610626220703125,
+      "learning_rate": 9.574285714285715e-06,
+      "loss": 0.9779,
+      "step": 650
+    },
+    {
+      "epoch": 16.469135802469136,
+      "grad_norm": 6.008159637451172,
+      "learning_rate": 9.502857142857144e-06,
+      "loss": 0.9798,
+      "step": 675
+    },
+    {
+      "epoch": 17.074074074074073,
+      "grad_norm": 6.685286521911621,
+      "learning_rate": 9.431428571428573e-06,
+      "loss": 0.9753,
+      "step": 700
+    },
+    {
+      "epoch": 17.691358024691358,
+      "grad_norm": 2.7540247440338135,
+      "learning_rate": 9.360000000000002e-06,
+      "loss": 0.967,
+      "step": 725
+    },
+    {
+      "epoch": 18.296296296296298,
+      "grad_norm": 4.825072288513184,
+      "learning_rate": 9.28857142857143e-06,
+      "loss": 0.9575,
+      "step": 750
+    },
+    {
+      "epoch": 18.91358024691358,
+      "grad_norm": 6.618119716644287,
+      "learning_rate": 9.217142857142858e-06,
+      "loss": 0.9675,
+      "step": 775
+    },
+    {
+      "epoch": 19.51851851851852,
+      "grad_norm": 5.465808391571045,
+      "learning_rate": 9.145714285714287e-06,
+      "loss": 0.9626,
+      "step": 800
+    },
+    {
+      "epoch": 20.123456790123456,
+      "grad_norm": 4.9501051902771,
+      "learning_rate": 9.074285714285716e-06,
+      "loss": 0.9638,
+      "step": 825
+    },
+    {
+      "epoch": 20.74074074074074,
+      "grad_norm": 4.926831245422363,
+      "learning_rate": 9.002857142857144e-06,
+      "loss": 0.9582,
+      "step": 850
+    },
+    {
+      "epoch": 21.34567901234568,
+      "grad_norm": 6.605464458465576,
+      "learning_rate": 8.931428571428573e-06,
+      "loss": 0.9551,
+      "step": 875
+    },
+    {
+      "epoch": 21.962962962962962,
+      "grad_norm": 5.774538040161133,
+      "learning_rate": 8.860000000000002e-06,
+      "loss": 0.9596,
+      "step": 900
+    },
+    {
+      "epoch": 22.567901234567902,
+      "grad_norm": 4.304802417755127,
+      "learning_rate": 8.788571428571429e-06,
+      "loss": 0.9489,
+      "step": 925
+    },
+    {
+      "epoch": 23.17283950617284,
+      "grad_norm": 5.171604633331299,
+      "learning_rate": 8.717142857142858e-06,
+      "loss": 0.953,
+      "step": 950
+    },
+    {
+      "epoch": 23.790123456790123,
+      "grad_norm": 7.152281761169434,
+      "learning_rate": 8.645714285714287e-06,
+      "loss": 0.9604,
+      "step": 975
+    },
+    {
+      "epoch": 24.395061728395063,
+      "grad_norm": 4.954558849334717,
+      "learning_rate": 8.574285714285714e-06,
+      "loss": 0.9489,
+      "step": 1000
+    },
+    {
+      "epoch": 24.395061728395063,
+      "eval_loss": 0.9205830097198486,
+      "eval_runtime": 2.2708,
+      "eval_samples_per_second": 63.413,
+      "eval_steps_per_second": 31.707,
+      "step": 1000
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 10.266937255859375,
+      "learning_rate": 8.502857142857143e-06,
+      "loss": 0.9541,
+      "step": 1025
+    },
+    {
+      "epoch": 25.617283950617285,
+      "grad_norm": 3.225881814956665,
+      "learning_rate": 8.431428571428572e-06,
+      "loss": 0.9451,
+      "step": 1050
+    },
+    {
+      "epoch": 26.22222222222222,
+      "grad_norm": 4.001440048217773,
+      "learning_rate": 8.36e-06,
+      "loss": 0.9422,
+      "step": 1075
+    },
+    {
+      "epoch": 26.839506172839506,
+      "grad_norm": 5.347984313964844,
+      "learning_rate": 8.288571428571429e-06,
+      "loss": 0.9434,
+      "step": 1100
+    },
+    {
+      "epoch": 27.444444444444443,
+      "grad_norm": 4.1566901206970215,
+      "learning_rate": 8.217142857142858e-06,
+      "loss": 0.942,
+      "step": 1125
+    },
+    {
+      "epoch": 28.049382716049383,
+      "grad_norm": 3.2101686000823975,
+      "learning_rate": 8.145714285714287e-06,
+      "loss": 0.9365,
+      "step": 1150
+    },
+    {
+      "epoch": 28.666666666666668,
+      "grad_norm": 5.183631896972656,
+      "learning_rate": 8.074285714285714e-06,
+      "loss": 0.941,
+      "step": 1175
+    },
+    {
+      "epoch": 29.271604938271604,
+      "grad_norm": 4.704529285430908,
+      "learning_rate": 8.002857142857143e-06,
+      "loss": 0.9374,
+      "step": 1200
+    },
+    {
+      "epoch": 29.88888888888889,
+      "grad_norm": 4.460058689117432,
+      "learning_rate": 7.931428571428572e-06,
+      "loss": 0.9383,
+      "step": 1225
+    },
+    {
+      "epoch": 30.493827160493826,
+      "grad_norm": 3.616530418395996,
+      "learning_rate": 7.860000000000001e-06,
+      "loss": 0.9321,
+      "step": 1250
+    },
+    {
+      "epoch": 31.098765432098766,
+      "grad_norm": 3.92207932472229,
+      "learning_rate": 7.788571428571428e-06,
+      "loss": 0.9347,
+      "step": 1275
+    },
+    {
+      "epoch": 31.71604938271605,
+      "grad_norm": 3.6962461471557617,
+      "learning_rate": 7.717142857142857e-06,
+      "loss": 0.9305,
+      "step": 1300
+    },
+    {
+      "epoch": 32.32098765432099,
+      "grad_norm": 4.276056289672852,
+      "learning_rate": 7.645714285714286e-06,
+      "loss": 0.9336,
+      "step": 1325
+    },
+    {
+      "epoch": 32.93827160493827,
+      "grad_norm": 5.176277160644531,
+      "learning_rate": 7.574285714285715e-06,
+      "loss": 0.9351,
+      "step": 1350
+    },
+    {
+      "epoch": 33.54320987654321,
+      "grad_norm": 7.2538347244262695,
+      "learning_rate": 7.502857142857144e-06,
+      "loss": 0.9241,
+      "step": 1375
+    },
+    {
+      "epoch": 34.148148148148145,
+      "grad_norm": 4.3576273918151855,
+      "learning_rate": 7.431428571428572e-06,
+      "loss": 0.9316,
+      "step": 1400
+    },
+    {
+      "epoch": 34.76543209876543,
+      "grad_norm": 9.138855934143066,
+      "learning_rate": 7.360000000000001e-06,
+      "loss": 0.9277,
+      "step": 1425
+    },
+    {
+      "epoch": 35.370370370370374,
+      "grad_norm": 4.475003719329834,
+      "learning_rate": 7.28857142857143e-06,
+      "loss": 0.9245,
+      "step": 1450
+    },
+    {
+      "epoch": 35.98765432098765,
+      "grad_norm": 7.28753137588501,
+      "learning_rate": 7.217142857142858e-06,
+      "loss": 0.9266,
+      "step": 1475
+    },
+    {
+      "epoch": 36.592592592592595,
+      "grad_norm": 5.1342949867248535,
+      "learning_rate": 7.145714285714286e-06,
+      "loss": 0.9297,
+      "step": 1500
+    },
+    {
+      "epoch": 37.19753086419753,
+      "grad_norm": 2.7765142917633057,
+      "learning_rate": 7.074285714285715e-06,
+      "loss": 0.9253,
+      "step": 1525
+    },
+    {
+      "epoch": 37.81481481481482,
+      "grad_norm": 3.8011326789855957,
+      "learning_rate": 7.002857142857143e-06,
+      "loss": 0.9203,
+      "step": 1550
+    },
+    {
+      "epoch": 38.41975308641975,
+      "grad_norm": 7.432782173156738,
+      "learning_rate": 6.931428571428572e-06,
+      "loss": 0.9196,
+      "step": 1575
+    },
+    {
+      "epoch": 39.02469135802469,
+      "grad_norm": 4.179474830627441,
+      "learning_rate": 6.860000000000001e-06,
+      "loss": 0.9188,
+      "step": 1600
+    },
+    {
+      "epoch": 39.641975308641975,
+      "grad_norm": 8.513073921203613,
+      "learning_rate": 6.7885714285714286e-06,
+      "loss": 0.9268,
+      "step": 1625
+    },
+    {
+      "epoch": 40.24691358024691,
+      "grad_norm": 3.699882984161377,
+      "learning_rate": 6.7171428571428576e-06,
+      "loss": 0.9216,
+      "step": 1650
+    },
+    {
+      "epoch": 40.864197530864196,
+      "grad_norm": 3.949507713317871,
+      "learning_rate": 6.645714285714287e-06,
+      "loss": 0.9238,
+      "step": 1675
+    },
+    {
+      "epoch": 41.46913580246913,
+      "grad_norm": 3.7951810359954834,
+      "learning_rate": 6.574285714285716e-06,
+      "loss": 0.9198,
+      "step": 1700
+    },
+    {
+      "epoch": 42.074074074074076,
+      "grad_norm": 5.373620986938477,
+      "learning_rate": 6.502857142857143e-06,
+      "loss": 0.9135,
+      "step": 1725
+    },
+    {
+      "epoch": 42.69135802469136,
+      "grad_norm": 6.875067234039307,
+      "learning_rate": 6.431428571428572e-06,
+      "loss": 0.918,
+      "step": 1750
+    },
+    {
+      "epoch": 43.2962962962963,
+      "grad_norm": 7.167726039886475,
+      "learning_rate": 6.360000000000001e-06,
+      "loss": 0.9276,
+      "step": 1775
+    },
+    {
+      "epoch": 43.91358024691358,
+      "grad_norm": 3.7067105770111084,
+      "learning_rate": 6.288571428571429e-06,
+      "loss": 0.9169,
+      "step": 1800
+    },
+    {
+      "epoch": 44.51851851851852,
+      "grad_norm": 4.474793434143066,
+      "learning_rate": 6.217142857142857e-06,
+      "loss": 0.9191,
+      "step": 1825
+    },
+    {
+      "epoch": 45.123456790123456,
+      "grad_norm": 5.386421203613281,
+      "learning_rate": 6.145714285714286e-06,
+      "loss": 0.9145,
+      "step": 1850
+    },
+    {
+      "epoch": 45.74074074074074,
+      "grad_norm": 3.068861246109009,
+      "learning_rate": 6.0742857142857145e-06,
+      "loss": 0.9095,
+      "step": 1875
+    },
+    {
+      "epoch": 46.34567901234568,
+      "grad_norm": 3.804973840713501,
+      "learning_rate": 6.0028571428571435e-06,
+      "loss": 0.912,
+      "step": 1900
+    },
+    {
+      "epoch": 46.96296296296296,
+      "grad_norm": 2.9225473403930664,
+      "learning_rate": 5.9314285714285725e-06,
+      "loss": 0.9049,
+      "step": 1925
+    },
+    {
+      "epoch": 47.5679012345679,
+      "grad_norm": 4.022708892822266,
+      "learning_rate": 5.86e-06,
+      "loss": 0.9049,
+      "step": 1950
+    },
+    {
+      "epoch": 48.17283950617284,
+      "grad_norm": 3.421691417694092,
+      "learning_rate": 5.788571428571429e-06,
+      "loss": 0.9101,
+      "step": 1975
+    },
+    {
+      "epoch": 48.79012345679013,
+      "grad_norm": 6.732350826263428,
+      "learning_rate": 5.717142857142858e-06,
+      "loss": 0.9105,
+      "step": 2000
+    },
+    {
+      "epoch": 48.79012345679013,
+      "eval_loss": 0.8953001499176025,
+      "eval_runtime": 2.1587,
+      "eval_samples_per_second": 66.707,
+      "eval_steps_per_second": 33.353,
+      "step": 2000
+    },
+    {
+      "epoch": 49.39506172839506,
+      "grad_norm": 5.506401538848877,
+      "learning_rate": 5.645714285714287e-06,
+      "loss": 0.9036,
+      "step": 2025
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 9.19892406463623,
+      "learning_rate": 5.574285714285714e-06,
+      "loss": 0.9107,
+      "step": 2050
+    },
+    {
+      "epoch": 50.617283950617285,
+      "grad_norm": 3.324119806289673,
+      "learning_rate": 5.502857142857143e-06,
+      "loss": 0.9118,
+      "step": 2075
+    },
+    {
+      "epoch": 51.22222222222222,
+      "grad_norm": 5.142299652099609,
+      "learning_rate": 5.431428571428572e-06,
+      "loss": 0.9098,
+      "step": 2100
+    },
+    {
+      "epoch": 51.839506172839506,
+      "grad_norm": 2.8806934356689453,
+      "learning_rate": 5.36e-06,
+      "loss": 0.9013,
+      "step": 2125
+    },
+    {
+      "epoch": 52.44444444444444,
+      "grad_norm": 4.728231430053711,
+      "learning_rate": 5.2885714285714285e-06,
+      "loss": 0.9049,
+      "step": 2150
+    },
+    {
+      "epoch": 53.04938271604938,
+      "grad_norm": 4.9596991539001465,
+      "learning_rate": 5.2171428571428575e-06,
+      "loss": 0.9128,
+      "step": 2175
+    },
+    {
+      "epoch": 53.666666666666664,
+      "grad_norm": 3.160998821258545,
+      "learning_rate": 5.145714285714286e-06,
+      "loss": 0.9003,
+      "step": 2200
+    },
+    {
+      "epoch": 54.27160493827161,
+      "grad_norm": 3.833195924758911,
+      "learning_rate": 5.074285714285715e-06,
+      "loss": 0.9088,
+      "step": 2225
+    },
+    {
+      "epoch": 54.888888888888886,
+      "grad_norm": 5.242589950561523,
+      "learning_rate": 5.002857142857144e-06,
+      "loss": 0.9005,
+      "step": 2250
+    },
+    {
+      "epoch": 55.49382716049383,
+      "grad_norm": 3.781388759613037,
+      "learning_rate": 4.931428571428572e-06,
+      "loss": 0.9028,
+      "step": 2275
+    },
+    {
+      "epoch": 56.098765432098766,
+      "grad_norm": 6.0595574378967285,
+      "learning_rate": 4.86e-06,
+      "loss": 0.9124,
+      "step": 2300
+    },
+    {
+      "epoch": 56.71604938271605,
+      "grad_norm": 2.7515597343444824,
+      "learning_rate": 4.788571428571429e-06,
+      "loss": 0.9025,
+      "step": 2325
+    },
+    {
+      "epoch": 57.32098765432099,
+      "grad_norm": 6.520521640777588,
+      "learning_rate": 4.717142857142857e-06,
+      "loss": 0.9065,
+      "step": 2350
+    },
+    {
+      "epoch": 57.93827160493827,
+      "grad_norm": 3.289445638656616,
+      "learning_rate": 4.645714285714286e-06,
+      "loss": 0.9004,
+      "step": 2375
+    },
+    {
+      "epoch": 58.54320987654321,
+      "grad_norm": 3.6132805347442627,
+      "learning_rate": 4.574285714285714e-06,
+      "loss": 0.9021,
+      "step": 2400
+    },
+    {
+      "epoch": 59.148148148148145,
+      "grad_norm": 5.021145343780518,
+      "learning_rate": 4.5028571428571434e-06,
+      "loss": 0.8957,
+      "step": 2425
+    },
+    {
+      "epoch": 59.76543209876543,
+      "grad_norm": 5.366466522216797,
+      "learning_rate": 4.431428571428572e-06,
+      "loss": 0.8986,
+      "step": 2450
+    },
+    {
+      "epoch": 60.370370370370374,
+      "grad_norm": 5.833218574523926,
+      "learning_rate": 4.360000000000001e-06,
+      "loss": 0.9045,
+      "step": 2475
+    },
+    {
+      "epoch": 60.98765432098765,
+      "grad_norm": 5.301181793212891,
+      "learning_rate": 4.288571428571429e-06,
+      "loss": 0.8975,
+      "step": 2500
+    },
+    {
+      "epoch": 61.592592592592595,
+      "grad_norm": 3.989539861679077,
+      "learning_rate": 4.217142857142858e-06,
+      "loss": 0.9021,
+      "step": 2525
+    },
+    {
+      "epoch": 62.19753086419753,
+      "grad_norm": 13.111737251281738,
+      "learning_rate": 4.145714285714286e-06,
+      "loss": 0.9043,
+      "step": 2550
+    },
+    {
+      "epoch": 62.81481481481482,
+      "grad_norm": 3.4066903591156006,
+      "learning_rate": 4.074285714285714e-06,
+      "loss": 0.8929,
+      "step": 2575
+    },
+    {
+      "epoch": 63.41975308641975,
+      "grad_norm": 3.9170608520507812,
+      "learning_rate": 4.002857142857143e-06,
+      "loss": 0.8998,
+      "step": 2600
+    },
+    {
+      "epoch": 64.0246913580247,
+      "grad_norm": 3.5934042930603027,
+      "learning_rate": 3.931428571428571e-06,
+      "loss": 0.898,
+      "step": 2625
+    },
+    {
+      "epoch": 64.64197530864197,
+      "grad_norm": 3.3771822452545166,
+      "learning_rate": 3.86e-06,
+      "loss": 0.901,
+      "step": 2650
+    },
+    {
+      "epoch": 65.24691358024691,
+      "grad_norm": 3.5741279125213623,
+      "learning_rate": 3.7885714285714285e-06,
+      "loss": 0.903,
+      "step": 2675
+    },
+    {
+      "epoch": 65.8641975308642,
+      "grad_norm": 4.369333267211914,
+      "learning_rate": 3.7171428571428575e-06,
+      "loss": 0.8907,
+      "step": 2700
+    },
+    {
+      "epoch": 66.46913580246914,
+      "grad_norm": 2.9996423721313477,
+      "learning_rate": 3.6457142857142857e-06,
+      "loss": 0.9008,
+      "step": 2725
+    },
+    {
+      "epoch": 67.07407407407408,
+      "grad_norm": 5.098217487335205,
+      "learning_rate": 3.5742857142857147e-06,
+      "loss": 0.8979,
+      "step": 2750
+    },
+    {
+      "epoch": 67.69135802469135,
+      "grad_norm": 3.8548665046691895,
+      "learning_rate": 3.5028571428571433e-06,
+      "loss": 0.8906,
+      "step": 2775
+    },
+    {
+      "epoch": 68.29629629629629,
+      "grad_norm": 4.787322521209717,
+      "learning_rate": 3.431428571428572e-06,
+      "loss": 0.8949,
+      "step": 2800
+    },
+    {
+      "epoch": 68.91358024691358,
+      "grad_norm": 2.8501498699188232,
+      "learning_rate": 3.3600000000000004e-06,
+      "loss": 0.8932,
+      "step": 2825
+    },
+    {
+      "epoch": 69.51851851851852,
+      "grad_norm": 7.697382926940918,
+      "learning_rate": 3.2885714285714286e-06,
+      "loss": 0.8961,
+      "step": 2850
+    },
+    {
+      "epoch": 70.12345679012346,
+      "grad_norm": 3.5617403984069824,
+      "learning_rate": 3.2171428571428576e-06,
+      "loss": 0.8975,
+      "step": 2875
+    },
+    {
+      "epoch": 70.74074074074075,
+      "grad_norm": 4.286247253417969,
+      "learning_rate": 3.1457142857142858e-06,
+      "loss": 0.8988,
+      "step": 2900
+    },
+    {
+      "epoch": 71.34567901234568,
+      "grad_norm": 3.0174379348754883,
+      "learning_rate": 3.074285714285715e-06,
+      "loss": 0.8986,
+      "step": 2925
+    },
+    {
+      "epoch": 71.96296296296296,
+      "grad_norm": 5.708584308624268,
+      "learning_rate": 3.002857142857143e-06,
+      "loss": 0.8888,
+      "step": 2950
+    },
+    {
+      "epoch": 72.5679012345679,
+      "grad_norm": 7.933815956115723,
+      "learning_rate": 2.9314285714285716e-06,
+      "loss": 0.9,
+      "step": 2975
+    },
+    {
+      "epoch": 73.17283950617283,
+      "grad_norm": 3.4261972904205322,
+      "learning_rate": 2.86e-06,
+      "loss": 0.8951,
+      "step": 3000
+    },
+    {
+      "epoch": 73.17283950617283,
+      "eval_loss": 0.8869494795799255,
+      "eval_runtime": 2.1798,
+      "eval_samples_per_second": 66.061,
+      "eval_steps_per_second": 33.03,
+      "step": 3000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 98,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2464002717960960.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-3000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66d367dc04409d63341644c780ebdb997e8756f9aa9f6d110afc5d9ab8de84be
+size 5905

checkpoint-4000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}

checkpoint-4000/config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}

checkpoint-4000/generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}

checkpoint-4000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b10dd87b217ab2fc492088d02d67c7955fbbff9f22b6fda9133dfa1744e6d9d
+size 577789320

checkpoint-4000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e3c148661528c4aa2cc3b96d89de7440a524fdfc4c68416d7a8438ea0d22f51
+size 1155777946

checkpoint-4000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "do_normalize": false,
+  "feature_extractor_type": "SpeechT5FeatureExtractor",
+  "feature_size": 1,
+  "fmax": 7600,
+  "fmin": 80,
+  "frame_signal_scale": 1.0,
+  "hop_length": 16,
+  "mel_floor": 1e-10,
+  "num_mel_bins": 80,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "SpeechT5Processor",
+  "reduction_factor": 2,
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "win_function": "hann_window",
+  "win_length": 64
+}

checkpoint-4000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aba3f2e2e55ab9cb538d7b0b1066ff8ea9c9ba098fb7f0715213c6343cb11c11
+size 14645

checkpoint-4000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:700b408dba7ef9825c572f76cd9846e502c0ecd58f44e9e252d68786437bee70
+size 1465

checkpoint-4000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}