diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..26d33521af10bcc7fd8cea344038eaaeb78d0ef5
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}
diff --git a/checkpoint-1000/added_tokens.json b/checkpoint-1000/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985
--- /dev/null
+++ b/checkpoint-1000/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}
diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1197099a882c66f2bbab2210646af21b05d05ee4
--- /dev/null
+++ b/checkpoint-1000/config.json
@@ -0,0 +1,91 @@
+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}
diff --git a/checkpoint-1000/generation_config.json b/checkpoint-1000/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1748094b7e4b8c1fb66bf2357c44c5f625a36179
--- /dev/null
+++ b/checkpoint-1000/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}
diff --git a/checkpoint-1000/model.safetensors b/checkpoint-1000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..745e068dcd86ebc82a19284e74a088cbf4e4f1fc
--- /dev/null
+++ b/checkpoint-1000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb5d6fe49ff85411787439f9ad2e6bfa7affebb9cb657848d6ca12433db4e10a
+size 577789320
diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f952bd8430e9be7c3e3db31d7ff28a775d8c7f6
--- /dev/null
+++ b/checkpoint-1000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a801f2d4ec47bf11dfadfa6c068daebd7c9d851603bd0a0eef429e5a22f6bb2e
+size 1155777946
diff --git a/checkpoint-1000/preprocessor_config.json b/checkpoint-1000/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..98a41e66e16f3225a15cb2b51c963fe2213aa273
--- /dev/null
+++ b/checkpoint-1000/preprocessor_config.json
@@ -0,0 +1,19 @@
+{
+  "do_normalize": false,
+  "feature_extractor_type": "SpeechT5FeatureExtractor",
+  "feature_size": 1,
+  "fmax": 7600,
+  "fmin": 80,
+  "frame_signal_scale": 1.0,
+  "hop_length": 16,
+  "mel_floor": 1e-10,
+  "num_mel_bins": 80,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "SpeechT5Processor",
+  "reduction_factor": 2,
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "win_function": "hann_window",
+  "win_length": 64
+}
diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..637854d725562baa5365c45dbb6e3e5ac76a576a
--- /dev/null
+++ b/checkpoint-1000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f27257904c7decb41a03da01a49d9f6fdf1f1b8f5e5d56fe64ef4572336d6eb
+size 14645
diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9744b455ac183d662ae7cb381d958022106980d8
--- /dev/null
+++ b/checkpoint-1000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5186565a906d7db433e54fbfdb3d62aa206e2cb82464d6a3316608741a692047
+size 1465
diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c
--- /dev/null
+++ b/checkpoint-1000/special_tokens_map.json
@@ -0,0 +1,13 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-1000/spm_char.model b/checkpoint-1000/spm_char.model
new file mode 100644
index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2
--- /dev/null
+++ b/checkpoint-1000/spm_char.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473
diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab
--- /dev/null
+++ b/checkpoint-1000/tokenizer_config.json
@@ -0,0 +1,64 @@
+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..0e6571fd31c1f1cd220e5e4d8213a56f10851444
--- /dev/null
+++ b/checkpoint-1000/trainer_state.json
@@ -0,0 +1,322 @@
+{
+  "best_global_step": 1000,
+  "best_metric": 0.9205830097198486,
+  "best_model_checkpoint": "runs/emotts_ravdess\\checkpoint-1000",
+  "epoch": 24.395061728395063,
+  "eval_steps": 1000,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6172839506172839,
+      "grad_norm": 46.678199768066406,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 3.4472,
+      "step": 25
+    },
+    {
+      "epoch": 1.2222222222222223,
+      "grad_norm": 26.903335571289062,
+      "learning_rate": 9.800000000000001e-07,
+      "loss": 2.9051,
+      "step": 50
+    },
+    {
+      "epoch": 1.8395061728395061,
+      "grad_norm": 16.712799072265625,
+      "learning_rate": 1.48e-06,
+      "loss": 2.2302,
+      "step": 75
+    },
+    {
+      "epoch": 2.4444444444444446,
+      "grad_norm": 11.607951164245605,
+      "learning_rate": 1.98e-06,
+      "loss": 1.7683,
+      "step": 100
+    },
+    {
+      "epoch": 3.049382716049383,
+      "grad_norm": 7.216983318328857,
+      "learning_rate": 2.4800000000000004e-06,
+      "loss": 1.5434,
+      "step": 125
+    },
+    {
+      "epoch": 3.6666666666666665,
+      "grad_norm": 10.899630546569824,
+      "learning_rate": 2.9800000000000003e-06,
+      "loss": 1.4385,
+      "step": 150
+    },
+    {
+      "epoch": 4.271604938271605,
+      "grad_norm": 6.701765537261963,
+      "learning_rate": 3.48e-06,
+      "loss": 1.3262,
+      "step": 175
+    },
+    {
+      "epoch": 4.888888888888889,
+      "grad_norm": 9.419053077697754,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 1.285,
+      "step": 200
+    },
+    {
+      "epoch": 5.493827160493828,
+      "grad_norm": 5.913278579711914,
+      "learning_rate": 4.48e-06,
+      "loss": 1.2503,
+      "step": 225
+    },
+    {
+      "epoch": 6.098765432098766,
+      "grad_norm": 8.171669006347656,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 1.1868,
+      "step": 250
+    },
+    {
+      "epoch": 6.716049382716049,
+      "grad_norm": 5.54558801651001,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 1.1478,
+      "step": 275
+    },
+    {
+      "epoch": 7.320987654320987,
+      "grad_norm": 5.325434684753418,
+      "learning_rate": 5.98e-06,
+      "loss": 1.1245,
+      "step": 300
+    },
+    {
+      "epoch": 7.938271604938271,
+      "grad_norm": 5.406148433685303,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 1.1145,
+      "step": 325
+    },
+    {
+      "epoch": 8.54320987654321,
+      "grad_norm": 8.461536407470703,
+      "learning_rate": 6.98e-06,
+      "loss": 1.0641,
+      "step": 350
+    },
+    {
+      "epoch": 9.148148148148149,
+      "grad_norm": 3.8533031940460205,
+      "learning_rate": 7.48e-06,
+      "loss": 1.0573,
+      "step": 375
+    },
+    {
+      "epoch": 9.765432098765432,
+      "grad_norm": 7.569976806640625,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 1.061,
+      "step": 400
+    },
+    {
+      "epoch": 10.37037037037037,
+      "grad_norm": 10.156228065490723,
+      "learning_rate": 8.48e-06,
+      "loss": 1.0485,
+      "step": 425
+    },
+    {
+      "epoch": 10.987654320987655,
+      "grad_norm": 4.668756484985352,
+      "learning_rate": 8.98e-06,
+      "loss": 1.0216,
+      "step": 450
+    },
+    {
+      "epoch": 11.592592592592592,
+      "grad_norm": 5.087125301361084,
+      "learning_rate": 9.48e-06,
+      "loss": 1.0319,
+      "step": 475
+    },
+    {
+      "epoch": 12.197530864197532,
+      "grad_norm": 7.943349361419678,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 1.0,
+      "step": 500
+    },
+    {
+      "epoch": 12.814814814814815,
+      "grad_norm": 7.655898571014404,
+      "learning_rate": 9.931428571428571e-06,
+      "loss": 1.0052,
+      "step": 525
+    },
+    {
+      "epoch": 13.419753086419753,
+      "grad_norm": 4.458106994628906,
+      "learning_rate": 9.86e-06,
+      "loss": 1.0001,
+      "step": 550
+    },
+    {
+      "epoch": 14.024691358024691,
+      "grad_norm": 9.058222770690918,
+      "learning_rate": 9.78857142857143e-06,
+      "loss": 1.0015,
+      "step": 575
+    },
+    {
+      "epoch": 14.641975308641975,
+      "grad_norm": 4.795205593109131,
+      "learning_rate": 9.717142857142858e-06,
+      "loss": 0.9836,
+      "step": 600
+    },
+    {
+      "epoch": 15.246913580246913,
+      "grad_norm": 10.566876411437988,
+      "learning_rate": 9.645714285714286e-06,
+      "loss": 1.0019,
+      "step": 625
+    },
+    {
+      "epoch": 15.864197530864198,
+      "grad_norm": 7.610626220703125,
+      "learning_rate": 9.574285714285715e-06,
+      "loss": 0.9779,
+      "step": 650
+    },
+    {
+      "epoch": 16.469135802469136,
+      "grad_norm": 6.008159637451172,
+      "learning_rate": 9.502857142857144e-06,
+      "loss": 0.9798,
+      "step": 675
+    },
+    {
+      "epoch": 17.074074074074073,
+      "grad_norm": 6.685286521911621,
+      "learning_rate": 9.431428571428573e-06,
+      "loss": 0.9753,
+      "step": 700
+    },
+    {
+      "epoch": 17.691358024691358,
+      "grad_norm": 2.7540247440338135,
+      "learning_rate": 9.360000000000002e-06,
+      "loss": 0.967,
+      "step": 725
+    },
+    {
+      "epoch": 18.296296296296298,
+      "grad_norm": 4.825072288513184,
+      "learning_rate": 9.28857142857143e-06,
+      "loss": 0.9575,
+      "step": 750
+    },
+    {
+      "epoch": 18.91358024691358,
+      "grad_norm": 6.618119716644287,
+      "learning_rate": 9.217142857142858e-06,
+      "loss": 0.9675,
+      "step": 775
+    },
+    {
+      "epoch": 19.51851851851852,
+      "grad_norm": 5.465808391571045,
+      "learning_rate": 9.145714285714287e-06,
+      "loss": 0.9626,
+      "step": 800
+    },
+    {
+      "epoch": 20.123456790123456,
+      "grad_norm": 4.9501051902771,
+      "learning_rate": 9.074285714285716e-06,
+      "loss": 0.9638,
+      "step": 825
+    },
+    {
+      "epoch": 20.74074074074074,
+      "grad_norm": 4.926831245422363,
+      "learning_rate": 9.002857142857144e-06,
+      "loss": 0.9582,
+      "step": 850
+    },
+    {
+      "epoch": 21.34567901234568,
+      "grad_norm": 6.605464458465576,
+      "learning_rate": 8.931428571428573e-06,
+      "loss": 0.9551,
+      "step": 875
+    },
+    {
+      "epoch": 21.962962962962962,
+      "grad_norm": 5.774538040161133,
+      "learning_rate": 8.860000000000002e-06,
+      "loss": 0.9596,
+      "step": 900
+    },
+    {
+      "epoch": 22.567901234567902,
+      "grad_norm": 4.304802417755127,
+      "learning_rate": 8.788571428571429e-06,
+      "loss": 0.9489,
+      "step": 925
+    },
+    {
+      "epoch": 23.17283950617284,
+      "grad_norm": 5.171604633331299,
+      "learning_rate": 8.717142857142858e-06,
+      "loss": 0.953,
+      "step": 950
+    },
+    {
+      "epoch": 23.790123456790123,
+      "grad_norm": 7.152281761169434,
+      "learning_rate": 8.645714285714287e-06,
+      "loss": 0.9604,
+      "step": 975
+    },
+    {
+      "epoch": 24.395061728395063,
+      "grad_norm": 4.954558849334717,
+      "learning_rate": 8.574285714285714e-06,
+      "loss": 0.9489,
+      "step": 1000
+    },
+    {
+      "epoch": 24.395061728395063,
+      "eval_loss": 0.9205830097198486,
+      "eval_runtime": 2.2708,
+      "eval_samples_per_second": 63.413,
+      "eval_steps_per_second": 31.707,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 98,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 821472814356480.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cf416a3be0d19e0ce5aadbb31f093c5d913fee53
--- /dev/null
+++ b/checkpoint-1000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66d367dc04409d63341644c780ebdb997e8756f9aa9f6d110afc5d9ab8de84be
+size 5905
diff --git a/checkpoint-2000/added_tokens.json b/checkpoint-2000/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985
--- /dev/null
+++ b/checkpoint-2000/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}
diff --git a/checkpoint-2000/config.json b/checkpoint-2000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1197099a882c66f2bbab2210646af21b05d05ee4
--- /dev/null
+++ b/checkpoint-2000/config.json
@@ -0,0 +1,91 @@
+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}
diff --git a/checkpoint-2000/generation_config.json b/checkpoint-2000/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1748094b7e4b8c1fb66bf2357c44c5f625a36179
--- /dev/null
+++ b/checkpoint-2000/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}
diff --git a/checkpoint-2000/model.safetensors b/checkpoint-2000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..36558fd470cb122c7d43ecb4ac16913b68df0eca
--- /dev/null
+++ b/checkpoint-2000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:295a846f5d0ead4e65b737b369b8205cd013a02d08d0220b3caa7e8e4b777b77
+size 577789320
diff --git a/checkpoint-2000/optimizer.pt b/checkpoint-2000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0ac5d0cb882ff013f3e14e6e98b9a98efe46965
--- /dev/null
+++ b/checkpoint-2000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2388ca0f503df54eb4d30573ff0fc9814dd98cc0759ae40bf1b7438f984e1ab6
+size 1155777946
diff --git a/checkpoint-2000/preprocessor_config.json b/checkpoint-2000/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..98a41e66e16f3225a15cb2b51c963fe2213aa273
--- /dev/null
+++ b/checkpoint-2000/preprocessor_config.json
@@ -0,0 +1,19 @@
+{
+  "do_normalize": false,
+  "feature_extractor_type": "SpeechT5FeatureExtractor",
+  "feature_size": 1,
+  "fmax": 7600,
+  "fmin": 80,
+  "frame_signal_scale": 1.0,
+  "hop_length": 16,
+  "mel_floor": 1e-10,
+  "num_mel_bins": 80,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "SpeechT5Processor",
+  "reduction_factor": 2,
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "win_function": "hann_window",
+  "win_length": 64
+}
diff --git a/checkpoint-2000/rng_state.pth b/checkpoint-2000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d1164add54cac59c217275dd520cddcd43877c7f
--- /dev/null
+++ b/checkpoint-2000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:396a8cc8a565882c2cc697e78085381bcb24a262358918ccaa5445eb5232e231
+size 14645
diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f3ce841469cd05c58a347033a571c80874dc9dc9
--- /dev/null
+++ b/checkpoint-2000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e92941487269a9e704ed42d0796c2eb3245e8d6d83c68a723be04187c99b397
+size 1465
diff --git a/checkpoint-2000/special_tokens_map.json b/checkpoint-2000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c
--- /dev/null
+++ b/checkpoint-2000/special_tokens_map.json
@@ -0,0 +1,13 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-2000/spm_char.model b/checkpoint-2000/spm_char.model
new file mode 100644
index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2
--- /dev/null
+++ b/checkpoint-2000/spm_char.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473
diff --git a/checkpoint-2000/tokenizer_config.json b/checkpoint-2000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab
--- /dev/null
+++ b/checkpoint-2000/tokenizer_config.json
@@ -0,0 +1,64 @@
+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..744adfe7d511646d459a96a97c16626e0e168006
--- /dev/null
+++ b/checkpoint-2000/trainer_state.json
@@ -0,0 +1,610 @@
+{
+  "best_global_step": 2000,
+  "best_metric": 0.8953001499176025,
+  "best_model_checkpoint": "runs/emotts_ravdess\\checkpoint-2000",
+  "epoch": 48.79012345679013,
+  "eval_steps": 1000,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6172839506172839,
+      "grad_norm": 46.678199768066406,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 3.4472,
+      "step": 25
+    },
+    {
+      "epoch": 1.2222222222222223,
+      "grad_norm": 26.903335571289062,
+      "learning_rate": 9.800000000000001e-07,
+      "loss": 2.9051,
+      "step": 50
+    },
+    {
+      "epoch": 1.8395061728395061,
+      "grad_norm": 16.712799072265625,
+      "learning_rate": 1.48e-06,
+      "loss": 2.2302,
+      "step": 75
+    },
+    {
+      "epoch": 2.4444444444444446,
+      "grad_norm": 11.607951164245605,
+      "learning_rate": 1.98e-06,
+      "loss": 1.7683,
+      "step": 100
+    },
+    {
+      "epoch": 3.049382716049383,
+      "grad_norm": 7.216983318328857,
+      "learning_rate": 2.4800000000000004e-06,
+      "loss": 1.5434,
+      "step": 125
+    },
+    {
+      "epoch": 3.6666666666666665,
+      "grad_norm": 10.899630546569824,
+      "learning_rate": 2.9800000000000003e-06,
+      "loss": 1.4385,
+      "step": 150
+    },
+    {
+      "epoch": 4.271604938271605,
+      "grad_norm": 6.701765537261963,
+      "learning_rate": 3.48e-06,
+      "loss": 1.3262,
+      "step": 175
+    },
+    {
+      "epoch": 4.888888888888889,
+      "grad_norm": 9.419053077697754,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 1.285,
+      "step": 200
+    },
+    {
+      "epoch": 5.493827160493828,
+      "grad_norm": 5.913278579711914,
+      "learning_rate": 4.48e-06,
+      "loss": 1.2503,
+      "step": 225
+    },
+    {
+      "epoch": 6.098765432098766,
+      "grad_norm": 8.171669006347656,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 1.1868,
+      "step": 250
+    },
+    {
+      "epoch": 6.716049382716049,
+      "grad_norm": 5.54558801651001,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 1.1478,
+      "step": 275
+    },
+    {
+      "epoch": 7.320987654320987,
+      "grad_norm": 5.325434684753418,
+      "learning_rate": 5.98e-06,
+      "loss": 1.1245,
+      "step": 300
+    },
+    {
+      "epoch": 7.938271604938271,
+      "grad_norm": 5.406148433685303,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 1.1145,
+      "step": 325
+    },
+    {
+      "epoch": 8.54320987654321,
+      "grad_norm": 8.461536407470703,
+      "learning_rate": 6.98e-06,
+      "loss": 1.0641,
+      "step": 350
+    },
+    {
+      "epoch": 9.148148148148149,
+      "grad_norm": 3.8533031940460205,
+      "learning_rate": 7.48e-06,
+      "loss": 1.0573,
+      "step": 375
+    },
+    {
+      "epoch": 9.765432098765432,
+      "grad_norm": 7.569976806640625,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 1.061,
+      "step": 400
+    },
+    {
+      "epoch": 10.37037037037037,
+      "grad_norm": 10.156228065490723,
+      "learning_rate": 8.48e-06,
+      "loss": 1.0485,
+      "step": 425
+    },
+    {
+      "epoch": 10.987654320987655,
+      "grad_norm": 4.668756484985352,
+      "learning_rate": 8.98e-06,
+      "loss": 1.0216,
+      "step": 450
+    },
+    {
+      "epoch": 11.592592592592592,
+      "grad_norm": 5.087125301361084,
+      "learning_rate": 9.48e-06,
+      "loss": 1.0319,
+      "step": 475
+    },
+    {
+      "epoch": 12.197530864197532,
+      "grad_norm": 7.943349361419678,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 1.0,
+      "step": 500
+    },
+    {
+      "epoch": 12.814814814814815,
+      "grad_norm": 7.655898571014404,
+      "learning_rate": 9.931428571428571e-06,
+      "loss": 1.0052,
+      "step": 525
+    },
+    {
+      "epoch": 13.419753086419753,
+      "grad_norm": 4.458106994628906,
+      "learning_rate": 9.86e-06,
+      "loss": 1.0001,
+      "step": 550
+    },
+    {
+      "epoch": 14.024691358024691,
+      "grad_norm": 9.058222770690918,
+      "learning_rate": 9.78857142857143e-06,
+      "loss": 1.0015,
+      "step": 575
+    },
+    {
+      "epoch": 14.641975308641975,
+      "grad_norm": 4.795205593109131,
+      "learning_rate": 9.717142857142858e-06,
+      "loss": 0.9836,
+      "step": 600
+    },
+    {
+      "epoch": 15.246913580246913,
+      "grad_norm": 10.566876411437988,
+      "learning_rate": 9.645714285714286e-06,
+      "loss": 1.0019,
+      "step": 625
+    },
+    {
+      "epoch": 15.864197530864198,
+      "grad_norm": 7.610626220703125,
+      "learning_rate": 9.574285714285715e-06,
+      "loss": 0.9779,
+      "step": 650
+    },
+    {
+      "epoch": 16.469135802469136,
+      "grad_norm": 6.008159637451172,
+      "learning_rate": 9.502857142857144e-06,
+      "loss": 0.9798,
+      "step": 675
+    },
+    {
+      "epoch": 17.074074074074073,
+      "grad_norm": 6.685286521911621,
+      "learning_rate": 9.431428571428573e-06,
+      "loss": 0.9753,
+      "step": 700
+    },
+    {
+      "epoch": 17.691358024691358,
+      "grad_norm": 2.7540247440338135,
+      "learning_rate": 9.360000000000002e-06,
+      "loss": 0.967,
+      "step": 725
+    },
+    {
+      "epoch": 18.296296296296298,
+      "grad_norm": 4.825072288513184,
+      "learning_rate": 9.28857142857143e-06,
+      "loss": 0.9575,
+      "step": 750
+    },
+    {
+      "epoch": 18.91358024691358,
+      "grad_norm": 6.618119716644287,
+      "learning_rate": 9.217142857142858e-06,
+      "loss": 0.9675,
+      "step": 775
+    },
+    {
+      "epoch": 19.51851851851852,
+      "grad_norm": 5.465808391571045,
+      "learning_rate": 9.145714285714287e-06,
+      "loss": 0.9626,
+      "step": 800
+    },
+    {
+      "epoch": 20.123456790123456,
+      "grad_norm": 4.9501051902771,
+      "learning_rate": 9.074285714285716e-06,
+      "loss": 0.9638,
+      "step": 825
+    },
+    {
+      "epoch": 20.74074074074074,
+      "grad_norm": 4.926831245422363,
+      "learning_rate": 9.002857142857144e-06,
+      "loss": 0.9582,
+      "step": 850
+    },
+    {
+      "epoch": 21.34567901234568,
+      "grad_norm": 6.605464458465576,
+      "learning_rate": 8.931428571428573e-06,
+      "loss": 0.9551,
+      "step": 875
+    },
+    {
+      "epoch": 21.962962962962962,
+      "grad_norm": 5.774538040161133,
+      "learning_rate": 8.860000000000002e-06,
+      "loss": 0.9596,
+      "step": 900
+    },
+    {
+      "epoch": 22.567901234567902,
+      "grad_norm": 4.304802417755127,
+      "learning_rate": 8.788571428571429e-06,
+      "loss": 0.9489,
+      "step": 925
+    },
+    {
+      "epoch": 23.17283950617284,
+      "grad_norm": 5.171604633331299,
+      "learning_rate": 8.717142857142858e-06,
+      "loss": 0.953,
+      "step": 950
+    },
+    {
+      "epoch": 23.790123456790123,
+      "grad_norm": 7.152281761169434,
+      "learning_rate": 8.645714285714287e-06,
+      "loss": 0.9604,
+      "step": 975
+    },
+    {
+      "epoch": 24.395061728395063,
+      "grad_norm": 4.954558849334717,
+      "learning_rate": 8.574285714285714e-06,
+      "loss": 0.9489,
+      "step": 1000
+    },
+    {
+      "epoch": 24.395061728395063,
+      "eval_loss": 0.9205830097198486,
+      "eval_runtime": 2.2708,
+      "eval_samples_per_second": 63.413,
+      "eval_steps_per_second": 31.707,
+      "step": 1000
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 10.266937255859375,
+      "learning_rate": 8.502857142857143e-06,
+      "loss": 0.9541,
+      "step": 1025
+    },
+    {
+      "epoch": 25.617283950617285,
+      "grad_norm": 3.225881814956665,
+      "learning_rate": 8.431428571428572e-06,
+      "loss": 0.9451,
+      "step": 1050
+    },
+    {
+      "epoch": 26.22222222222222,
+      "grad_norm": 4.001440048217773,
+      "learning_rate": 8.36e-06,
+      "loss": 0.9422,
+      "step": 1075
+    },
+    {
+      "epoch": 26.839506172839506,
+      "grad_norm": 5.347984313964844,
+      "learning_rate": 8.288571428571429e-06,
+      "loss": 0.9434,
+      "step": 1100
+    },
+    {
+      "epoch": 27.444444444444443,
+      "grad_norm": 4.1566901206970215,
+      "learning_rate": 8.217142857142858e-06,
+      "loss": 0.942,
+      "step": 1125
+    },
+    {
+      "epoch": 28.049382716049383,
+      "grad_norm": 3.2101686000823975,
+      "learning_rate": 8.145714285714287e-06,
+      "loss": 0.9365,
+      "step": 1150
+    },
+    {
+      "epoch": 28.666666666666668,
+      "grad_norm": 5.183631896972656,
+      "learning_rate": 8.074285714285714e-06,
+      "loss": 0.941,
+      "step": 1175
+    },
+    {
+      "epoch": 29.271604938271604,
+      "grad_norm": 4.704529285430908,
+      "learning_rate": 8.002857142857143e-06,
+      "loss": 0.9374,
+      "step": 1200
+    },
+    {
+      "epoch": 29.88888888888889,
+      "grad_norm": 4.460058689117432,
+      "learning_rate": 7.931428571428572e-06,
+      "loss": 0.9383,
+      "step": 1225
+    },
+    {
+      "epoch": 30.493827160493826,
+      "grad_norm": 3.616530418395996,
+      "learning_rate": 7.860000000000001e-06,
+      "loss": 0.9321,
+      "step": 1250
+    },
+    {
+      "epoch": 31.098765432098766,
+      "grad_norm": 3.92207932472229,
+      "learning_rate": 7.788571428571428e-06,
+      "loss": 0.9347,
+      "step": 1275
+    },
+    {
+      "epoch": 31.71604938271605,
+      "grad_norm": 3.6962461471557617,
+      "learning_rate": 7.717142857142857e-06,
+      "loss": 0.9305,
+      "step": 1300
+    },
+    {
+      "epoch": 32.32098765432099,
+      "grad_norm": 4.276056289672852,
+      "learning_rate": 7.645714285714286e-06,
+      "loss": 0.9336,
+      "step": 1325
+    },
+    {
+      "epoch": 32.93827160493827,
+      "grad_norm": 5.176277160644531,
+      "learning_rate": 7.574285714285715e-06,
+      "loss": 0.9351,
+      "step": 1350
+    },
+    {
+      "epoch": 33.54320987654321,
+      "grad_norm": 7.2538347244262695,
+      "learning_rate": 7.502857142857144e-06,
+      "loss": 0.9241,
+      "step": 1375
+    },
+    {
+      "epoch": 34.148148148148145,
+      "grad_norm": 4.3576273918151855,
+      "learning_rate": 7.431428571428572e-06,
+      "loss": 0.9316,
+      "step": 1400
+    },
+    {
+      "epoch": 34.76543209876543,
+      "grad_norm": 9.138855934143066,
+      "learning_rate": 7.360000000000001e-06,
+      "loss": 0.9277,
+      "step": 1425
+    },
+    {
+      "epoch": 35.370370370370374,
+      "grad_norm": 4.475003719329834,
+      "learning_rate": 7.28857142857143e-06,
+      "loss": 0.9245,
+      "step": 1450
+    },
+    {
+      "epoch": 35.98765432098765,
+      "grad_norm": 7.28753137588501,
+      "learning_rate": 7.217142857142858e-06,
+      "loss": 0.9266,
+      "step": 1475
+    },
+    {
+      "epoch": 36.592592592592595,
+      "grad_norm": 5.1342949867248535,
+      "learning_rate": 7.145714285714286e-06,
+      "loss": 0.9297,
+      "step": 1500
+    },
+    {
+      "epoch": 37.19753086419753,
+      "grad_norm": 2.7765142917633057,
+      "learning_rate": 7.074285714285715e-06,
+      "loss": 0.9253,
+      "step": 1525
+    },
+    {
+      "epoch": 37.81481481481482,
+      "grad_norm": 3.8011326789855957,
+      "learning_rate": 7.002857142857143e-06,
+      "loss": 0.9203,
+      "step": 1550
+    },
+    {
+      "epoch": 38.41975308641975,
+      "grad_norm": 7.432782173156738,
+      "learning_rate": 6.931428571428572e-06,
+      "loss": 0.9196,
+      "step": 1575
+    },
+    {
+      "epoch": 39.02469135802469,
+      "grad_norm": 4.179474830627441,
+      "learning_rate": 6.860000000000001e-06,
+      "loss": 0.9188,
+      "step": 1600
+    },
+    {
+      "epoch": 39.641975308641975,
+      "grad_norm": 8.513073921203613,
+      "learning_rate": 6.7885714285714286e-06,
+      "loss": 0.9268,
+      "step": 1625
+    },
+    {
+      "epoch": 40.24691358024691,
+      "grad_norm": 3.699882984161377,
+      "learning_rate": 6.7171428571428576e-06,
+      "loss": 0.9216,
+      "step": 1650
+    },
+    {
+      "epoch": 40.864197530864196,
+      "grad_norm": 3.949507713317871,
+      "learning_rate": 6.645714285714287e-06,
+      "loss": 0.9238,
+      "step": 1675
+    },
+    {
+      "epoch": 41.46913580246913,
+      "grad_norm": 3.7951810359954834,
+      "learning_rate": 6.574285714285716e-06,
+      "loss": 0.9198,
+      "step": 1700
+    },
+    {
+      "epoch": 42.074074074074076,
+      "grad_norm": 5.373620986938477,
+      "learning_rate": 6.502857142857143e-06,
+      "loss": 0.9135,
+      "step": 1725
+    },
+    {
+      "epoch": 42.69135802469136,
+      "grad_norm": 6.875067234039307,
+      "learning_rate": 6.431428571428572e-06,
+      "loss": 0.918,
+      "step": 1750
+    },
+    {
+      "epoch": 43.2962962962963,
+      "grad_norm": 7.167726039886475,
+      "learning_rate": 6.360000000000001e-06,
+      "loss": 0.9276,
+      "step": 1775
+    },
+    {
+      "epoch": 43.91358024691358,
+      "grad_norm": 3.7067105770111084,
+      "learning_rate": 6.288571428571429e-06,
+      "loss": 0.9169,
+      "step": 1800
+    },
+    {
+      "epoch": 44.51851851851852,
+      "grad_norm": 4.474793434143066,
+      "learning_rate": 6.217142857142857e-06,
+      "loss": 0.9191,
+      "step": 1825
+    },
+    {
+      "epoch": 45.123456790123456,
+      "grad_norm": 5.386421203613281,
+      "learning_rate": 6.145714285714286e-06,
+      "loss": 0.9145,
+      "step": 1850
+    },
+    {
+      "epoch": 45.74074074074074,
+      "grad_norm": 3.068861246109009,
+      "learning_rate": 6.0742857142857145e-06,
+      "loss": 0.9095,
+      "step": 1875
+    },
+    {
+      "epoch": 46.34567901234568,
+      "grad_norm": 3.804973840713501,
+      "learning_rate": 6.0028571428571435e-06,
+      "loss": 0.912,
+      "step": 1900
+    },
+    {
+      "epoch": 46.96296296296296,
+      "grad_norm": 2.9225473403930664,
+      "learning_rate": 5.9314285714285725e-06,
+      "loss": 0.9049,
+      "step": 1925
+    },
+    {
+      "epoch": 47.5679012345679,
+      "grad_norm": 4.022708892822266,
+      "learning_rate": 5.86e-06,
+      "loss": 0.9049,
+      "step": 1950
+    },
+    {
+      "epoch": 48.17283950617284,
+      "grad_norm": 3.421691417694092,
+      "learning_rate": 5.788571428571429e-06,
+      "loss": 0.9101,
+      "step": 1975
+    },
+    {
+      "epoch": 48.79012345679013,
+      "grad_norm": 6.732350826263428,
+      "learning_rate": 5.717142857142858e-06,
+      "loss": 0.9105,
+      "step": 2000
+    },
+    {
+      "epoch": 48.79012345679013,
+      "eval_loss": 0.8953001499176025,
+      "eval_runtime": 2.1587,
+      "eval_samples_per_second": 66.707,
+      "eval_steps_per_second": 33.353,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 98,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1642945628712960.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cf416a3be0d19e0ce5aadbb31f093c5d913fee53
--- /dev/null
+++ b/checkpoint-2000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66d367dc04409d63341644c780ebdb997e8756f9aa9f6d110afc5d9ab8de84be
+size 5905
diff --git a/checkpoint-3000/added_tokens.json b/checkpoint-3000/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985
--- /dev/null
+++ b/checkpoint-3000/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}
diff --git a/checkpoint-3000/config.json b/checkpoint-3000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1197099a882c66f2bbab2210646af21b05d05ee4
--- /dev/null
+++ b/checkpoint-3000/config.json
@@ -0,0 +1,91 @@
+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}
diff --git a/checkpoint-3000/generation_config.json b/checkpoint-3000/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1748094b7e4b8c1fb66bf2357c44c5f625a36179
--- /dev/null
+++ b/checkpoint-3000/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}
diff --git a/checkpoint-3000/model.safetensors b/checkpoint-3000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..46193a605c14d097bb3430d4d15e1f9cdf6f04fd
--- /dev/null
+++ b/checkpoint-3000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f6eca9575648e4c7d7eb1ea916fee7b23eafefa0db8bf09a04bd46beac454f2
+size 577789320
diff --git a/checkpoint-3000/optimizer.pt b/checkpoint-3000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8a2ccc8147a8c1625dea169568604855600b971e
--- /dev/null
+++ b/checkpoint-3000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8c7d0e8b916fd9a744e0e04850570d8a6297e6bac0767ebd63b53e0cefe4057
+size 1155777946
diff --git a/checkpoint-3000/preprocessor_config.json b/checkpoint-3000/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..98a41e66e16f3225a15cb2b51c963fe2213aa273
--- /dev/null
+++ b/checkpoint-3000/preprocessor_config.json
@@ -0,0 +1,19 @@
+{
+  "do_normalize": false,
+  "feature_extractor_type": "SpeechT5FeatureExtractor",
+  "feature_size": 1,
+  "fmax": 7600,
+  "fmin": 80,
+  "frame_signal_scale": 1.0,
+  "hop_length": 16,
+  "mel_floor": 1e-10,
+  "num_mel_bins": 80,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "SpeechT5Processor",
+  "reduction_factor": 2,
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "win_function": "hann_window",
+  "win_length": 64
+}
diff --git a/checkpoint-3000/rng_state.pth b/checkpoint-3000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..56c94236a64a8f2889669e23bcaf8a2665536af9
--- /dev/null
+++ b/checkpoint-3000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bd7be6ad18d8737c21def51bc146679a3086895043a68047db9ee35a01b64e8
+size 14645
diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b03ecef0b0c05f5110fd89496cd4723d841ada76
--- /dev/null
+++ b/checkpoint-3000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32cb3a0b1d61782860d37955716f6b5e952b190320ed6c3b93171c974f9325c9
+size 1465
diff --git a/checkpoint-3000/special_tokens_map.json b/checkpoint-3000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c
--- /dev/null
+++ b/checkpoint-3000/special_tokens_map.json
@@ -0,0 +1,13 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-3000/spm_char.model b/checkpoint-3000/spm_char.model
new file mode 100644
index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2
--- /dev/null
+++ b/checkpoint-3000/spm_char.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473
diff --git a/checkpoint-3000/tokenizer_config.json b/checkpoint-3000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab
--- /dev/null
+++ b/checkpoint-3000/tokenizer_config.json
@@ -0,0 +1,64 @@
+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..55636baf3a1f510489b13f061959f3bbd8e03803
--- /dev/null
+++ b/checkpoint-3000/trainer_state.json
@@ -0,0 +1,898 @@
+{
+  "best_global_step": 3000,
+  "best_metric": 0.8869494795799255,
+  "best_model_checkpoint": "runs/emotts_ravdess\\checkpoint-3000",
+  "epoch": 73.17283950617283,
+  "eval_steps": 1000,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6172839506172839,
+      "grad_norm": 46.678199768066406,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 3.4472,
+      "step": 25
+    },
+    {
+      "epoch": 1.2222222222222223,
+      "grad_norm": 26.903335571289062,
+      "learning_rate": 9.800000000000001e-07,
+      "loss": 2.9051,
+      "step": 50
+    },
+    {
+      "epoch": 1.8395061728395061,
+      "grad_norm": 16.712799072265625,
+      "learning_rate": 1.48e-06,
+      "loss": 2.2302,
+      "step": 75
+    },
+    {
+      "epoch": 2.4444444444444446,
+      "grad_norm": 11.607951164245605,
+      "learning_rate": 1.98e-06,
+      "loss": 1.7683,
+      "step": 100
+    },
+    {
+      "epoch": 3.049382716049383,
+      "grad_norm": 7.216983318328857,
+      "learning_rate": 2.4800000000000004e-06,
+      "loss": 1.5434,
+      "step": 125
+    },
+    {
+      "epoch": 3.6666666666666665,
+      "grad_norm": 10.899630546569824,
+      "learning_rate": 2.9800000000000003e-06,
+      "loss": 1.4385,
+      "step": 150
+    },
+    {
+      "epoch": 4.271604938271605,
+      "grad_norm": 6.701765537261963,
+      "learning_rate": 3.48e-06,
+      "loss": 1.3262,
+      "step": 175
+    },
+    {
+      "epoch": 4.888888888888889,
+      "grad_norm": 9.419053077697754,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 1.285,
+      "step": 200
+    },
+    {
+      "epoch": 5.493827160493828,
+      "grad_norm": 5.913278579711914,
+      "learning_rate": 4.48e-06,
+      "loss": 1.2503,
+      "step": 225
+    },
+    {
+      "epoch": 6.098765432098766,
+      "grad_norm": 8.171669006347656,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 1.1868,
+      "step": 250
+    },
+    {
+      "epoch": 6.716049382716049,
+      "grad_norm": 5.54558801651001,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 1.1478,
+      "step": 275
+    },
+    {
+      "epoch": 7.320987654320987,
+      "grad_norm": 5.325434684753418,
+      "learning_rate": 5.98e-06,
+      "loss": 1.1245,
+      "step": 300
+    },
+    {
+      "epoch": 7.938271604938271,
+      "grad_norm": 5.406148433685303,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 1.1145,
+      "step": 325
+    },
+    {
+      "epoch": 8.54320987654321,
+      "grad_norm": 8.461536407470703,
+      "learning_rate": 6.98e-06,
+      "loss": 1.0641,
+      "step": 350
+    },
+    {
+      "epoch": 9.148148148148149,
+      "grad_norm": 3.8533031940460205,
+      "learning_rate": 7.48e-06,
+      "loss": 1.0573,
+      "step": 375
+    },
+    {
+      "epoch": 9.765432098765432,
+      "grad_norm": 7.569976806640625,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 1.061,
+      "step": 400
+    },
+    {
+      "epoch": 10.37037037037037,
+      "grad_norm": 10.156228065490723,
+      "learning_rate": 8.48e-06,
+      "loss": 1.0485,
+      "step": 425
+    },
+    {
+      "epoch": 10.987654320987655,
+      "grad_norm": 4.668756484985352,
+      "learning_rate": 8.98e-06,
+      "loss": 1.0216,
+      "step": 450
+    },
+    {
+      "epoch": 11.592592592592592,
+      "grad_norm": 5.087125301361084,
+      "learning_rate": 9.48e-06,
+      "loss": 1.0319,
+      "step": 475
+    },
+    {
+      "epoch": 12.197530864197532,
+      "grad_norm": 7.943349361419678,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 1.0,
+      "step": 500
+    },
+    {
+      "epoch": 12.814814814814815,
+      "grad_norm": 7.655898571014404,
+      "learning_rate": 9.931428571428571e-06,
+      "loss": 1.0052,
+      "step": 525
+    },
+    {
+      "epoch": 13.419753086419753,
+      "grad_norm": 4.458106994628906,
+      "learning_rate": 9.86e-06,
+      "loss": 1.0001,
+      "step": 550
+    },
+    {
+      "epoch": 14.024691358024691,
+      "grad_norm": 9.058222770690918,
+      "learning_rate": 9.78857142857143e-06,
+      "loss": 1.0015,
+      "step": 575
+    },
+    {
+      "epoch": 14.641975308641975,
+      "grad_norm": 4.795205593109131,
+      "learning_rate": 9.717142857142858e-06,
+      "loss": 0.9836,
+      "step": 600
+    },
+    {
+      "epoch": 15.246913580246913,
+      "grad_norm": 10.566876411437988,
+      "learning_rate": 9.645714285714286e-06,
+      "loss": 1.0019,
+      "step": 625
+    },
+    {
+      "epoch": 15.864197530864198,
+      "grad_norm": 7.610626220703125,
+      "learning_rate": 9.574285714285715e-06,
+      "loss": 0.9779,
+      "step": 650
+    },
+    {
+      "epoch": 16.469135802469136,
+      "grad_norm": 6.008159637451172,
+      "learning_rate": 9.502857142857144e-06,
+      "loss": 0.9798,
+      "step": 675
+    },
+    {
+      "epoch": 17.074074074074073,
+      "grad_norm": 6.685286521911621,
+      "learning_rate": 9.431428571428573e-06,
+      "loss": 0.9753,
+      "step": 700
+    },
+    {
+      "epoch": 17.691358024691358,
+      "grad_norm": 2.7540247440338135,
+      "learning_rate": 9.360000000000002e-06,
+      "loss": 0.967,
+      "step": 725
+    },
+    {
+      "epoch": 18.296296296296298,
+      "grad_norm": 4.825072288513184,
+      "learning_rate": 9.28857142857143e-06,
+      "loss": 0.9575,
+      "step": 750
+    },
+    {
+      "epoch": 18.91358024691358,
+      "grad_norm": 6.618119716644287,
+      "learning_rate": 9.217142857142858e-06,
+      "loss": 0.9675,
+      "step": 775
+    },
+    {
+      "epoch": 19.51851851851852,
+      "grad_norm": 5.465808391571045,
+      "learning_rate": 9.145714285714287e-06,
+      "loss": 0.9626,
+      "step": 800
+    },
+    {
+      "epoch": 20.123456790123456,
+      "grad_norm": 4.9501051902771,
+      "learning_rate": 9.074285714285716e-06,
+      "loss": 0.9638,
+      "step": 825
+    },
+    {
+      "epoch": 20.74074074074074,
+      "grad_norm": 4.926831245422363,
+      "learning_rate": 9.002857142857144e-06,
+      "loss": 0.9582,
+      "step": 850
+    },
+    {
+      "epoch": 21.34567901234568,
+      "grad_norm": 6.605464458465576,
+      "learning_rate": 8.931428571428573e-06,
+      "loss": 0.9551,
+      "step": 875
+    },
+    {
+      "epoch": 21.962962962962962,
+      "grad_norm": 5.774538040161133,
+      "learning_rate": 8.860000000000002e-06,
+      "loss": 0.9596,
+      "step": 900
+    },
+    {
+      "epoch": 22.567901234567902,
+      "grad_norm": 4.304802417755127,
+      "learning_rate": 8.788571428571429e-06,
+      "loss": 0.9489,
+      "step": 925
+    },
+    {
+      "epoch": 23.17283950617284,
+      "grad_norm": 5.171604633331299,
+      "learning_rate": 8.717142857142858e-06,
+      "loss": 0.953,
+      "step": 950
+    },
+    {
+      "epoch": 23.790123456790123,
+      "grad_norm": 7.152281761169434,
+      "learning_rate": 8.645714285714287e-06,
+      "loss": 0.9604,
+      "step": 975
+    },
+    {
+      "epoch": 24.395061728395063,
+      "grad_norm": 4.954558849334717,
+      "learning_rate": 8.574285714285714e-06,
+      "loss": 0.9489,
+      "step": 1000
+    },
+    {
+      "epoch": 24.395061728395063,
+      "eval_loss": 0.9205830097198486,
+      "eval_runtime": 2.2708,
+      "eval_samples_per_second": 63.413,
+      "eval_steps_per_second": 31.707,
+      "step": 1000
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 10.266937255859375,
+      "learning_rate": 8.502857142857143e-06,
+      "loss": 0.9541,
+      "step": 1025
+    },
+    {
+      "epoch": 25.617283950617285,
+      "grad_norm": 3.225881814956665,
+      "learning_rate": 8.431428571428572e-06,
+      "loss": 0.9451,
+      "step": 1050
+    },
+    {
+      "epoch": 26.22222222222222,
+      "grad_norm": 4.001440048217773,
+      "learning_rate": 8.36e-06,
+      "loss": 0.9422,
+      "step": 1075
+    },
+    {
+      "epoch": 26.839506172839506,
+      "grad_norm": 5.347984313964844,
+      "learning_rate": 8.288571428571429e-06,
+      "loss": 0.9434,
+      "step": 1100
+    },
+    {
+      "epoch": 27.444444444444443,
+      "grad_norm": 4.1566901206970215,
+      "learning_rate": 8.217142857142858e-06,
+      "loss": 0.942,
+      "step": 1125
+    },
+    {
+      "epoch": 28.049382716049383,
+      "grad_norm": 3.2101686000823975,
+      "learning_rate": 8.145714285714287e-06,
+      "loss": 0.9365,
+      "step": 1150
+    },
+    {
+      "epoch": 28.666666666666668,
+      "grad_norm": 5.183631896972656,
+      "learning_rate": 8.074285714285714e-06,
+      "loss": 0.941,
+      "step": 1175
+    },
+    {
+      "epoch": 29.271604938271604,
+      "grad_norm": 4.704529285430908,
+      "learning_rate": 8.002857142857143e-06,
+      "loss": 0.9374,
+      "step": 1200
+    },
+    {
+      "epoch": 29.88888888888889,
+      "grad_norm": 4.460058689117432,
+      "learning_rate": 7.931428571428572e-06,
+      "loss": 0.9383,
+      "step": 1225
+    },
+    {
+      "epoch": 30.493827160493826,
+      "grad_norm": 3.616530418395996,
+      "learning_rate": 7.860000000000001e-06,
+      "loss": 0.9321,
+      "step": 1250
+    },
+    {
+      "epoch": 31.098765432098766,
+      "grad_norm": 3.92207932472229,
+      "learning_rate": 7.788571428571428e-06,
+      "loss": 0.9347,
+      "step": 1275
+    },
+    {
+      "epoch": 31.71604938271605,
+      "grad_norm": 3.6962461471557617,
+      "learning_rate": 7.717142857142857e-06,
+      "loss": 0.9305,
+      "step": 1300
+    },
+    {
+      "epoch": 32.32098765432099,
+      "grad_norm": 4.276056289672852,
+      "learning_rate": 7.645714285714286e-06,
+      "loss": 0.9336,
+      "step": 1325
+    },
+    {
+      "epoch": 32.93827160493827,
+      "grad_norm": 5.176277160644531,
+      "learning_rate": 7.574285714285715e-06,
+      "loss": 0.9351,
+      "step": 1350
+    },
+    {
+      "epoch": 33.54320987654321,
+      "grad_norm": 7.2538347244262695,
+      "learning_rate": 7.502857142857144e-06,
+      "loss": 0.9241,
+      "step": 1375
+    },
+    {
+      "epoch": 34.148148148148145,
+      "grad_norm": 4.3576273918151855,
+      "learning_rate": 7.431428571428572e-06,
+      "loss": 0.9316,
+      "step": 1400
+    },
+    {
+      "epoch": 34.76543209876543,
+      "grad_norm": 9.138855934143066,
+      "learning_rate": 7.360000000000001e-06,
+      "loss": 0.9277,
+      "step": 1425
+    },
+    {
+      "epoch": 35.370370370370374,
+      "grad_norm": 4.475003719329834,
+      "learning_rate": 7.28857142857143e-06,
+      "loss": 0.9245,
+      "step": 1450
+    },
+    {
+      "epoch": 35.98765432098765,
+      "grad_norm": 7.28753137588501,
+      "learning_rate": 7.217142857142858e-06,
+      "loss": 0.9266,
+      "step": 1475
+    },
+    {
+      "epoch": 36.592592592592595,
+      "grad_norm": 5.1342949867248535,
+      "learning_rate": 7.145714285714286e-06,
+      "loss": 0.9297,
+      "step": 1500
+    },
+    {
+      "epoch": 37.19753086419753,
+      "grad_norm": 2.7765142917633057,
+      "learning_rate": 7.074285714285715e-06,
+      "loss": 0.9253,
+      "step": 1525
+    },
+    {
+      "epoch": 37.81481481481482,
+      "grad_norm": 3.8011326789855957,
+      "learning_rate": 7.002857142857143e-06,
+      "loss": 0.9203,
+      "step": 1550
+    },
+    {
+      "epoch": 38.41975308641975,
+      "grad_norm": 7.432782173156738,
+      "learning_rate": 6.931428571428572e-06,
+      "loss": 0.9196,
+      "step": 1575
+    },
+    {
+      "epoch": 39.02469135802469,
+      "grad_norm": 4.179474830627441,
+      "learning_rate": 6.860000000000001e-06,
+      "loss": 0.9188,
+      "step": 1600
+    },
+    {
+      "epoch": 39.641975308641975,
+      "grad_norm": 8.513073921203613,
+      "learning_rate": 6.7885714285714286e-06,
+      "loss": 0.9268,
+      "step": 1625
+    },
+    {
+      "epoch": 40.24691358024691,
+      "grad_norm": 3.699882984161377,
+      "learning_rate": 6.7171428571428576e-06,
+      "loss": 0.9216,
+      "step": 1650
+    },
+    {
+      "epoch": 40.864197530864196,
+      "grad_norm": 3.949507713317871,
+      "learning_rate": 6.645714285714287e-06,
+      "loss": 0.9238,
+      "step": 1675
+    },
+    {
+      "epoch": 41.46913580246913,
+      "grad_norm": 3.7951810359954834,
+      "learning_rate": 6.574285714285716e-06,
+      "loss": 0.9198,
+      "step": 1700
+    },
+    {
+      "epoch": 42.074074074074076,
+      "grad_norm": 5.373620986938477,
+      "learning_rate": 6.502857142857143e-06,
+      "loss": 0.9135,
+      "step": 1725
+    },
+    {
+      "epoch": 42.69135802469136,
+      "grad_norm": 6.875067234039307,
+      "learning_rate": 6.431428571428572e-06,
+      "loss": 0.918,
+      "step": 1750
+    },
+    {
+      "epoch": 43.2962962962963,
+      "grad_norm": 7.167726039886475,
+      "learning_rate": 6.360000000000001e-06,
+      "loss": 0.9276,
+      "step": 1775
+    },
+    {
+      "epoch": 43.91358024691358,
+      "grad_norm": 3.7067105770111084,
+      "learning_rate": 6.288571428571429e-06,
+      "loss": 0.9169,
+      "step": 1800
+    },
+    {
+      "epoch": 44.51851851851852,
+      "grad_norm": 4.474793434143066,
+      "learning_rate": 6.217142857142857e-06,
+      "loss": 0.9191,
+      "step": 1825
+    },
+    {
+      "epoch": 45.123456790123456,
+      "grad_norm": 5.386421203613281,
+      "learning_rate": 6.145714285714286e-06,
+      "loss": 0.9145,
+      "step": 1850
+    },
+    {
+      "epoch": 45.74074074074074,
+      "grad_norm": 3.068861246109009,
+      "learning_rate": 6.0742857142857145e-06,
+      "loss": 0.9095,
+      "step": 1875
+    },
+    {
+      "epoch": 46.34567901234568,
+      "grad_norm": 3.804973840713501,
+      "learning_rate": 6.0028571428571435e-06,
+      "loss": 0.912,
+      "step": 1900
+    },
+    {
+      "epoch": 46.96296296296296,
+      "grad_norm": 2.9225473403930664,
+      "learning_rate": 5.9314285714285725e-06,
+      "loss": 0.9049,
+      "step": 1925
+    },
+    {
+      "epoch": 47.5679012345679,
+      "grad_norm": 4.022708892822266,
+      "learning_rate": 5.86e-06,
+      "loss": 0.9049,
+      "step": 1950
+    },
+    {
+      "epoch": 48.17283950617284,
+      "grad_norm": 3.421691417694092,
+      "learning_rate": 5.788571428571429e-06,
+      "loss": 0.9101,
+      "step": 1975
+    },
+    {
+      "epoch": 48.79012345679013,
+      "grad_norm": 6.732350826263428,
+      "learning_rate": 5.717142857142858e-06,
+      "loss": 0.9105,
+      "step": 2000
+    },
+    {
+      "epoch": 48.79012345679013,
+      "eval_loss": 0.8953001499176025,
+      "eval_runtime": 2.1587,
+      "eval_samples_per_second": 66.707,
+      "eval_steps_per_second": 33.353,
+      "step": 2000
+    },
+    {
+      "epoch": 49.39506172839506,
+      "grad_norm": 5.506401538848877,
+      "learning_rate": 5.645714285714287e-06,
+      "loss": 0.9036,
+      "step": 2025
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 9.19892406463623,
+      "learning_rate": 5.574285714285714e-06,
+      "loss": 0.9107,
+      "step": 2050
+    },
+    {
+      "epoch": 50.617283950617285,
+      "grad_norm": 3.324119806289673,
+      "learning_rate": 5.502857142857143e-06,
+      "loss": 0.9118,
+      "step": 2075
+    },
+    {
+      "epoch": 51.22222222222222,
+      "grad_norm": 5.142299652099609,
+      "learning_rate": 5.431428571428572e-06,
+      "loss": 0.9098,
+      "step": 2100
+    },
+    {
+      "epoch": 51.839506172839506,
+      "grad_norm": 2.8806934356689453,
+      "learning_rate": 5.36e-06,
+      "loss": 0.9013,
+      "step": 2125
+    },
+    {
+      "epoch": 52.44444444444444,
+      "grad_norm": 4.728231430053711,
+      "learning_rate": 5.2885714285714285e-06,
+      "loss": 0.9049,
+      "step": 2150
+    },
+    {
+      "epoch": 53.04938271604938,
+      "grad_norm": 4.9596991539001465,
+      "learning_rate": 5.2171428571428575e-06,
+      "loss": 0.9128,
+      "step": 2175
+    },
+    {
+      "epoch": 53.666666666666664,
+      "grad_norm": 3.160998821258545,
+      "learning_rate": 5.145714285714286e-06,
+      "loss": 0.9003,
+      "step": 2200
+    },
+    {
+      "epoch": 54.27160493827161,
+      "grad_norm": 3.833195924758911,
+      "learning_rate": 5.074285714285715e-06,
+      "loss": 0.9088,
+      "step": 2225
+    },
+    {
+      "epoch": 54.888888888888886,
+      "grad_norm": 5.242589950561523,
+      "learning_rate": 5.002857142857144e-06,
+      "loss": 0.9005,
+      "step": 2250
+    },
+    {
+      "epoch": 55.49382716049383,
+      "grad_norm": 3.781388759613037,
+      "learning_rate": 4.931428571428572e-06,
+      "loss": 0.9028,
+      "step": 2275
+    },
+    {
+      "epoch": 56.098765432098766,
+      "grad_norm": 6.0595574378967285,
+      "learning_rate": 4.86e-06,
+      "loss": 0.9124,
+      "step": 2300
+    },
+    {
+      "epoch": 56.71604938271605,
+      "grad_norm": 2.7515597343444824,
+      "learning_rate": 4.788571428571429e-06,
+      "loss": 0.9025,
+      "step": 2325
+    },
+    {
+      "epoch": 57.32098765432099,
+      "grad_norm": 6.520521640777588,
+      "learning_rate": 4.717142857142857e-06,
+      "loss": 0.9065,
+      "step": 2350
+    },
+    {
+      "epoch": 57.93827160493827,
+      "grad_norm": 3.289445638656616,
+      "learning_rate": 4.645714285714286e-06,
+      "loss": 0.9004,
+      "step": 2375
+    },
+    {
+      "epoch": 58.54320987654321,
+      "grad_norm": 3.6132805347442627,
+      "learning_rate": 4.574285714285714e-06,
+      "loss": 0.9021,
+      "step": 2400
+    },
+    {
+      "epoch": 59.148148148148145,
+      "grad_norm": 5.021145343780518,
+      "learning_rate": 4.5028571428571434e-06,
+      "loss": 0.8957,
+      "step": 2425
+    },
+    {
+      "epoch": 59.76543209876543,
+      "grad_norm": 5.366466522216797,
+      "learning_rate": 4.431428571428572e-06,
+      "loss": 0.8986,
+      "step": 2450
+    },
+    {
+      "epoch": 60.370370370370374,
+      "grad_norm": 5.833218574523926,
+      "learning_rate": 4.360000000000001e-06,
+      "loss": 0.9045,
+      "step": 2475
+    },
+    {
+      "epoch": 60.98765432098765,
+      "grad_norm": 5.301181793212891,
+      "learning_rate": 4.288571428571429e-06,
+      "loss": 0.8975,
+      "step": 2500
+    },
+    {
+      "epoch": 61.592592592592595,
+      "grad_norm": 3.989539861679077,
+      "learning_rate": 4.217142857142858e-06,
+      "loss": 0.9021,
+      "step": 2525
+    },
+    {
+      "epoch": 62.19753086419753,
+      "grad_norm": 13.111737251281738,
+      "learning_rate": 4.145714285714286e-06,
+      "loss": 0.9043,
+      "step": 2550
+    },
+    {
+      "epoch": 62.81481481481482,
+      "grad_norm": 3.4066903591156006,
+      "learning_rate": 4.074285714285714e-06,
+      "loss": 0.8929,
+      "step": 2575
+    },
+    {
+      "epoch": 63.41975308641975,
+      "grad_norm": 3.9170608520507812,
+      "learning_rate": 4.002857142857143e-06,
+      "loss": 0.8998,
+      "step": 2600
+    },
+    {
+      "epoch": 64.0246913580247,
+      "grad_norm": 3.5934042930603027,
+      "learning_rate": 3.931428571428571e-06,
+      "loss": 0.898,
+      "step": 2625
+    },
+    {
+      "epoch": 64.64197530864197,
+      "grad_norm": 3.3771822452545166,
+      "learning_rate": 3.86e-06,
+      "loss": 0.901,
+      "step": 2650
+    },
+    {
+      "epoch": 65.24691358024691,
+      "grad_norm": 3.5741279125213623,
+      "learning_rate": 3.7885714285714285e-06,
+      "loss": 0.903,
+      "step": 2675
+    },
+    {
+      "epoch": 65.8641975308642,
+      "grad_norm": 4.369333267211914,
+      "learning_rate": 3.7171428571428575e-06,
+      "loss": 0.8907,
+      "step": 2700
+    },
+    {
+      "epoch": 66.46913580246914,
+      "grad_norm": 2.9996423721313477,
+      "learning_rate": 3.6457142857142857e-06,
+      "loss": 0.9008,
+      "step": 2725
+    },
+    {
+      "epoch": 67.07407407407408,
+      "grad_norm": 5.098217487335205,
+      "learning_rate": 3.5742857142857147e-06,
+      "loss": 0.8979,
+      "step": 2750
+    },
+    {
+      "epoch": 67.69135802469135,
+      "grad_norm": 3.8548665046691895,
+      "learning_rate": 3.5028571428571433e-06,
+      "loss": 0.8906,
+      "step": 2775
+    },
+    {
+      "epoch": 68.29629629629629,
+      "grad_norm": 4.787322521209717,
+      "learning_rate": 3.431428571428572e-06,
+      "loss": 0.8949,
+      "step": 2800
+    },
+    {
+      "epoch": 68.91358024691358,
+      "grad_norm": 2.8501498699188232,
+      "learning_rate": 3.3600000000000004e-06,
+      "loss": 0.8932,
+      "step": 2825
+    },
+    {
+      "epoch": 69.51851851851852,
+      "grad_norm": 7.697382926940918,
+      "learning_rate": 3.2885714285714286e-06,
+      "loss": 0.8961,
+      "step": 2850
+    },
+    {
+      "epoch": 70.12345679012346,
+      "grad_norm": 3.5617403984069824,
+      "learning_rate": 3.2171428571428576e-06,
+      "loss": 0.8975,
+      "step": 2875
+    },
+    {
+      "epoch": 70.74074074074075,
+      "grad_norm": 4.286247253417969,
+      "learning_rate": 3.1457142857142858e-06,
+      "loss": 0.8988,
+      "step": 2900
+    },
+    {
+      "epoch": 71.34567901234568,
+      "grad_norm": 3.0174379348754883,
+      "learning_rate": 3.074285714285715e-06,
+      "loss": 0.8986,
+      "step": 2925
+    },
+    {
+      "epoch": 71.96296296296296,
+      "grad_norm": 5.708584308624268,
+      "learning_rate": 3.002857142857143e-06,
+      "loss": 0.8888,
+      "step": 2950
+    },
+    {
+      "epoch": 72.5679012345679,
+      "grad_norm": 7.933815956115723,
+      "learning_rate": 2.9314285714285716e-06,
+      "loss": 0.9,
+      "step": 2975
+    },
+    {
+      "epoch": 73.17283950617283,
+      "grad_norm": 3.4261972904205322,
+      "learning_rate": 2.86e-06,
+      "loss": 0.8951,
+      "step": 3000
+    },
+    {
+      "epoch": 73.17283950617283,
+      "eval_loss": 0.8869494795799255,
+      "eval_runtime": 2.1798,
+      "eval_samples_per_second": 66.061,
+      "eval_steps_per_second": 33.03,
+      "step": 3000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 98,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2464002717960960.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cf416a3be0d19e0ce5aadbb31f093c5d913fee53
--- /dev/null
+++ b/checkpoint-3000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66d367dc04409d63341644c780ebdb997e8756f9aa9f6d110afc5d9ab8de84be
+size 5905
diff --git a/checkpoint-4000/added_tokens.json b/checkpoint-4000/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985
--- /dev/null
+++ b/checkpoint-4000/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}
diff --git a/checkpoint-4000/config.json b/checkpoint-4000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1197099a882c66f2bbab2210646af21b05d05ee4
--- /dev/null
+++ b/checkpoint-4000/config.json
@@ -0,0 +1,91 @@
+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}
diff --git a/checkpoint-4000/generation_config.json b/checkpoint-4000/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1748094b7e4b8c1fb66bf2357c44c5f625a36179
--- /dev/null
+++ b/checkpoint-4000/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}
diff --git a/checkpoint-4000/model.safetensors b/checkpoint-4000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..063a7debbe0b96bed7d11b8b1e6151197d55864b
--- /dev/null
+++ b/checkpoint-4000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b10dd87b217ab2fc492088d02d67c7955fbbff9f22b6fda9133dfa1744e6d9d
+size 577789320
diff --git a/checkpoint-4000/optimizer.pt b/checkpoint-4000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c03c363ec916b474511c91b3bc2c682ab09127f8
--- /dev/null
+++ b/checkpoint-4000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e3c148661528c4aa2cc3b96d89de7440a524fdfc4c68416d7a8438ea0d22f51
+size 1155777946
diff --git a/checkpoint-4000/preprocessor_config.json b/checkpoint-4000/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..98a41e66e16f3225a15cb2b51c963fe2213aa273
--- /dev/null
+++ b/checkpoint-4000/preprocessor_config.json
@@ -0,0 +1,19 @@
+{
+  "do_normalize": false,
+  "feature_extractor_type": "SpeechT5FeatureExtractor",
+  "feature_size": 1,
+  "fmax": 7600,
+  "fmin": 80,
+  "frame_signal_scale": 1.0,
+  "hop_length": 16,
+  "mel_floor": 1e-10,
+  "num_mel_bins": 80,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "SpeechT5Processor",
+  "reduction_factor": 2,
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "win_function": "hann_window",
+  "win_length": 64
+}
diff --git a/checkpoint-4000/rng_state.pth b/checkpoint-4000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4508811a95c544c9c4f67e30fd978a5256727bac
--- /dev/null
+++ b/checkpoint-4000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aba3f2e2e55ab9cb538d7b0b1066ff8ea9c9ba098fb7f0715213c6343cb11c11
+size 14645
diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..36df701f2ca9347d59a8d9d660998e8bb8e1c34e
--- /dev/null
+++ b/checkpoint-4000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:700b408dba7ef9825c572f76cd9846e502c0ecd58f44e9e252d68786437bee70
+size 1465
diff --git a/checkpoint-4000/special_tokens_map.json b/checkpoint-4000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c
--- /dev/null
+++ b/checkpoint-4000/special_tokens_map.json
@@ -0,0 +1,13 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-4000/spm_char.model b/checkpoint-4000/spm_char.model
new file mode 100644
index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2
--- /dev/null
+++ b/checkpoint-4000/spm_char.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473
diff --git a/checkpoint-4000/tokenizer_config.json b/checkpoint-4000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab
--- /dev/null
+++ b/checkpoint-4000/tokenizer_config.json
@@ -0,0 +1,64 @@
+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5bd0445ad17111b4e6de583e7136aeccb9f362
--- /dev/null
+++ b/checkpoint-4000/trainer_state.json
@@ -0,0 +1,1186 @@
+{
+  "best_global_step": 4000,
+  "best_metric": 0.8817942142486572,
+  "best_model_checkpoint": "runs/emotts_ravdess\\checkpoint-4000",
+  "epoch": 97.5679012345679,
+  "eval_steps": 1000,
+  "global_step": 4000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6172839506172839,
+      "grad_norm": 46.678199768066406,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 3.4472,
+      "step": 25
+    },
+    {
+      "epoch": 1.2222222222222223,
+      "grad_norm": 26.903335571289062,
+      "learning_rate": 9.800000000000001e-07,
+      "loss": 2.9051,
+      "step": 50
+    },
+    {
+      "epoch": 1.8395061728395061,
+      "grad_norm": 16.712799072265625,
+      "learning_rate": 1.48e-06,
+      "loss": 2.2302,
+      "step": 75
+    },
+    {
+      "epoch": 2.4444444444444446,
+      "grad_norm": 11.607951164245605,
+      "learning_rate": 1.98e-06,
+      "loss": 1.7683,
+      "step": 100
+    },
+    {
+      "epoch": 3.049382716049383,
+      "grad_norm": 7.216983318328857,
+      "learning_rate": 2.4800000000000004e-06,
+      "loss": 1.5434,
+      "step": 125
+    },
+    {
+      "epoch": 3.6666666666666665,
+      "grad_norm": 10.899630546569824,
+      "learning_rate": 2.9800000000000003e-06,
+      "loss": 1.4385,
+      "step": 150
+    },
+    {
+      "epoch": 4.271604938271605,
+      "grad_norm": 6.701765537261963,
+      "learning_rate": 3.48e-06,
+      "loss": 1.3262,
+      "step": 175
+    },
+    {
+      "epoch": 4.888888888888889,
+      "grad_norm": 9.419053077697754,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 1.285,
+      "step": 200
+    },
+    {
+      "epoch": 5.493827160493828,
+      "grad_norm": 5.913278579711914,
+      "learning_rate": 4.48e-06,
+      "loss": 1.2503,
+      "step": 225
+    },
+    {
+      "epoch": 6.098765432098766,
+      "grad_norm": 8.171669006347656,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 1.1868,
+      "step": 250
+    },
+    {
+      "epoch": 6.716049382716049,
+      "grad_norm": 5.54558801651001,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 1.1478,
+      "step": 275
+    },
+    {
+      "epoch": 7.320987654320987,
+      "grad_norm": 5.325434684753418,
+      "learning_rate": 5.98e-06,
+      "loss": 1.1245,
+      "step": 300
+    },
+    {
+      "epoch": 7.938271604938271,
+      "grad_norm": 5.406148433685303,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 1.1145,
+      "step": 325
+    },
+    {
+      "epoch": 8.54320987654321,
+      "grad_norm": 8.461536407470703,
+      "learning_rate": 6.98e-06,
+      "loss": 1.0641,
+      "step": 350
+    },
+    {
+      "epoch": 9.148148148148149,
+      "grad_norm": 3.8533031940460205,
+      "learning_rate": 7.48e-06,
+      "loss": 1.0573,
+      "step": 375
+    },
+    {
+      "epoch": 9.765432098765432,
+      "grad_norm": 7.569976806640625,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 1.061,
+      "step": 400
+    },
+    {
+      "epoch": 10.37037037037037,
+      "grad_norm": 10.156228065490723,
+      "learning_rate": 8.48e-06,
+      "loss": 1.0485,
+      "step": 425
+    },
+    {
+      "epoch": 10.987654320987655,
+      "grad_norm": 4.668756484985352,
+      "learning_rate": 8.98e-06,
+      "loss": 1.0216,
+      "step": 450
+    },
+    {
+      "epoch": 11.592592592592592,
+      "grad_norm": 5.087125301361084,
+      "learning_rate": 9.48e-06,
+      "loss": 1.0319,
+      "step": 475
+    },
+    {
+      "epoch": 12.197530864197532,
+      "grad_norm": 7.943349361419678,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 1.0,
+      "step": 500
+    },
+    {
+      "epoch": 12.814814814814815,
+      "grad_norm": 7.655898571014404,
+      "learning_rate": 9.931428571428571e-06,
+      "loss": 1.0052,
+      "step": 525
+    },
+    {
+      "epoch": 13.419753086419753,
+      "grad_norm": 4.458106994628906,
+      "learning_rate": 9.86e-06,
+      "loss": 1.0001,
+      "step": 550
+    },
+    {
+      "epoch": 14.024691358024691,
+      "grad_norm": 9.058222770690918,
+      "learning_rate": 9.78857142857143e-06,
+      "loss": 1.0015,
+      "step": 575
+    },
+    {
+      "epoch": 14.641975308641975,
+      "grad_norm": 4.795205593109131,
+      "learning_rate": 9.717142857142858e-06,
+      "loss": 0.9836,
+      "step": 600
+    },
+    {
+      "epoch": 15.246913580246913,
+      "grad_norm": 10.566876411437988,
+      "learning_rate": 9.645714285714286e-06,
+      "loss": 1.0019,
+      "step": 625
+    },
+    {
+      "epoch": 15.864197530864198,
+      "grad_norm": 7.610626220703125,
+      "learning_rate": 9.574285714285715e-06,
+      "loss": 0.9779,
+      "step": 650
+    },
+    {
+      "epoch": 16.469135802469136,
+      "grad_norm": 6.008159637451172,
+      "learning_rate": 9.502857142857144e-06,
+      "loss": 0.9798,
+      "step": 675
+    },
+    {
+      "epoch": 17.074074074074073,
+      "grad_norm": 6.685286521911621,
+      "learning_rate": 9.431428571428573e-06,
+      "loss": 0.9753,
+      "step": 700
+    },
+    {
+      "epoch": 17.691358024691358,
+      "grad_norm": 2.7540247440338135,
+      "learning_rate": 9.360000000000002e-06,
+      "loss": 0.967,
+      "step": 725
+    },
+    {
+      "epoch": 18.296296296296298,
+      "grad_norm": 4.825072288513184,
+      "learning_rate": 9.28857142857143e-06,
+      "loss": 0.9575,
+      "step": 750
+    },
+    {
+      "epoch": 18.91358024691358,
+      "grad_norm": 6.618119716644287,
+      "learning_rate": 9.217142857142858e-06,
+      "loss": 0.9675,
+      "step": 775
+    },
+    {
+      "epoch": 19.51851851851852,
+      "grad_norm": 5.465808391571045,
+      "learning_rate": 9.145714285714287e-06,
+      "loss": 0.9626,
+      "step": 800
+    },
+    {
+      "epoch": 20.123456790123456,
+      "grad_norm": 4.9501051902771,
+      "learning_rate": 9.074285714285716e-06,
+      "loss": 0.9638,
+      "step": 825
+    },
+    {
+      "epoch": 20.74074074074074,
+      "grad_norm": 4.926831245422363,
+      "learning_rate": 9.002857142857144e-06,
+      "loss": 0.9582,
+      "step": 850
+    },
+    {
+      "epoch": 21.34567901234568,
+      "grad_norm": 6.605464458465576,
+      "learning_rate": 8.931428571428573e-06,
+      "loss": 0.9551,
+      "step": 875
+    },
+    {
+      "epoch": 21.962962962962962,
+      "grad_norm": 5.774538040161133,
+      "learning_rate": 8.860000000000002e-06,
+      "loss": 0.9596,
+      "step": 900
+    },
+    {
+      "epoch": 22.567901234567902,
+      "grad_norm": 4.304802417755127,
+      "learning_rate": 8.788571428571429e-06,
+      "loss": 0.9489,
+      "step": 925
+    },
+    {
+      "epoch": 23.17283950617284,
+      "grad_norm": 5.171604633331299,
+      "learning_rate": 8.717142857142858e-06,
+      "loss": 0.953,
+      "step": 950
+    },
+    {
+      "epoch": 23.790123456790123,
+      "grad_norm": 7.152281761169434,
+      "learning_rate": 8.645714285714287e-06,
+      "loss": 0.9604,
+      "step": 975
+    },
+    {
+      "epoch": 24.395061728395063,
+      "grad_norm": 4.954558849334717,
+      "learning_rate": 8.574285714285714e-06,
+      "loss": 0.9489,
+      "step": 1000
+    },
+    {
+      "epoch": 24.395061728395063,
+      "eval_loss": 0.9205830097198486,
+      "eval_runtime": 2.2708,
+      "eval_samples_per_second": 63.413,
+      "eval_steps_per_second": 31.707,
+      "step": 1000
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 10.266937255859375,
+      "learning_rate": 8.502857142857143e-06,
+      "loss": 0.9541,
+      "step": 1025
+    },
+    {
+      "epoch": 25.617283950617285,
+      "grad_norm": 3.225881814956665,
+      "learning_rate": 8.431428571428572e-06,
+      "loss": 0.9451,
+      "step": 1050
+    },
+    {
+      "epoch": 26.22222222222222,
+      "grad_norm": 4.001440048217773,
+      "learning_rate": 8.36e-06,
+      "loss": 0.9422,
+      "step": 1075
+    },
+    {
+      "epoch": 26.839506172839506,
+      "grad_norm": 5.347984313964844,
+      "learning_rate": 8.288571428571429e-06,
+      "loss": 0.9434,
+      "step": 1100
+    },
+    {
+      "epoch": 27.444444444444443,
+      "grad_norm": 4.1566901206970215,
+      "learning_rate": 8.217142857142858e-06,
+      "loss": 0.942,
+      "step": 1125
+    },
+    {
+      "epoch": 28.049382716049383,
+      "grad_norm": 3.2101686000823975,
+      "learning_rate": 8.145714285714287e-06,
+      "loss": 0.9365,
+      "step": 1150
+    },
+    {
+      "epoch": 28.666666666666668,
+      "grad_norm": 5.183631896972656,
+      "learning_rate": 8.074285714285714e-06,
+      "loss": 0.941,
+      "step": 1175
+    },
+    {
+      "epoch": 29.271604938271604,
+      "grad_norm": 4.704529285430908,
+      "learning_rate": 8.002857142857143e-06,
+      "loss": 0.9374,
+      "step": 1200
+    },
+    {
+      "epoch": 29.88888888888889,
+      "grad_norm": 4.460058689117432,
+      "learning_rate": 7.931428571428572e-06,
+      "loss": 0.9383,
+      "step": 1225
+    },
+    {
+      "epoch": 30.493827160493826,
+      "grad_norm": 3.616530418395996,
+      "learning_rate": 7.860000000000001e-06,
+      "loss": 0.9321,
+      "step": 1250
+    },
+    {
+      "epoch": 31.098765432098766,
+      "grad_norm": 3.92207932472229,
+      "learning_rate": 7.788571428571428e-06,
+      "loss": 0.9347,
+      "step": 1275
+    },
+    {
+      "epoch": 31.71604938271605,
+      "grad_norm": 3.6962461471557617,
+      "learning_rate": 7.717142857142857e-06,
+      "loss": 0.9305,
+      "step": 1300
+    },
+    {
+      "epoch": 32.32098765432099,
+      "grad_norm": 4.276056289672852,
+      "learning_rate": 7.645714285714286e-06,
+      "loss": 0.9336,
+      "step": 1325
+    },
+    {
+      "epoch": 32.93827160493827,
+      "grad_norm": 5.176277160644531,
+      "learning_rate": 7.574285714285715e-06,
+      "loss": 0.9351,
+      "step": 1350
+    },
+    {
+      "epoch": 33.54320987654321,
+      "grad_norm": 7.2538347244262695,
+      "learning_rate": 7.502857142857144e-06,
+      "loss": 0.9241,
+      "step": 1375
+    },
+    {
+      "epoch": 34.148148148148145,
+      "grad_norm": 4.3576273918151855,
+      "learning_rate": 7.431428571428572e-06,
+      "loss": 0.9316,
+      "step": 1400
+    },
+    {
+      "epoch": 34.76543209876543,
+      "grad_norm": 9.138855934143066,
+      "learning_rate": 7.360000000000001e-06,
+      "loss": 0.9277,
+      "step": 1425
+    },
+    {
+      "epoch": 35.370370370370374,
+      "grad_norm": 4.475003719329834,
+      "learning_rate": 7.28857142857143e-06,
+      "loss": 0.9245,
+      "step": 1450
+    },
+    {
+      "epoch": 35.98765432098765,
+      "grad_norm": 7.28753137588501,
+      "learning_rate": 7.217142857142858e-06,
+      "loss": 0.9266,
+      "step": 1475
+    },
+    {
+      "epoch": 36.592592592592595,
+      "grad_norm": 5.1342949867248535,
+      "learning_rate": 7.145714285714286e-06,
+      "loss": 0.9297,
+      "step": 1500
+    },
+    {
+      "epoch": 37.19753086419753,
+      "grad_norm": 2.7765142917633057,
+      "learning_rate": 7.074285714285715e-06,
+      "loss": 0.9253,
+      "step": 1525
+    },
+    {
+      "epoch": 37.81481481481482,
+      "grad_norm": 3.8011326789855957,
+      "learning_rate": 7.002857142857143e-06,
+      "loss": 0.9203,
+      "step": 1550
+    },
+    {
+      "epoch": 38.41975308641975,
+      "grad_norm": 7.432782173156738,
+      "learning_rate": 6.931428571428572e-06,
+      "loss": 0.9196,
+      "step": 1575
+    },
+    {
+      "epoch": 39.02469135802469,
+      "grad_norm": 4.179474830627441,
+      "learning_rate": 6.860000000000001e-06,
+      "loss": 0.9188,
+      "step": 1600
+    },
+    {
+      "epoch": 39.641975308641975,
+      "grad_norm": 8.513073921203613,
+      "learning_rate": 6.7885714285714286e-06,
+      "loss": 0.9268,
+      "step": 1625
+    },
+    {
+      "epoch": 40.24691358024691,
+      "grad_norm": 3.699882984161377,
+      "learning_rate": 6.7171428571428576e-06,
+      "loss": 0.9216,
+      "step": 1650
+    },
+    {
+      "epoch": 40.864197530864196,
+      "grad_norm": 3.949507713317871,
+      "learning_rate": 6.645714285714287e-06,
+      "loss": 0.9238,
+      "step": 1675
+    },
+    {
+      "epoch": 41.46913580246913,
+      "grad_norm": 3.7951810359954834,
+      "learning_rate": 6.574285714285716e-06,
+      "loss": 0.9198,
+      "step": 1700
+    },
+    {
+      "epoch": 42.074074074074076,
+      "grad_norm": 5.373620986938477,
+      "learning_rate": 6.502857142857143e-06,
+      "loss": 0.9135,
+      "step": 1725
+    },
+    {
+      "epoch": 42.69135802469136,
+      "grad_norm": 6.875067234039307,
+      "learning_rate": 6.431428571428572e-06,
+      "loss": 0.918,
+      "step": 1750
+    },
+    {
+      "epoch": 43.2962962962963,
+      "grad_norm": 7.167726039886475,
+      "learning_rate": 6.360000000000001e-06,
+      "loss": 0.9276,
+      "step": 1775
+    },
+    {
+      "epoch": 43.91358024691358,
+      "grad_norm": 3.7067105770111084,
+      "learning_rate": 6.288571428571429e-06,
+      "loss": 0.9169,
+      "step": 1800
+    },
+    {
+      "epoch": 44.51851851851852,
+      "grad_norm": 4.474793434143066,
+      "learning_rate": 6.217142857142857e-06,
+      "loss": 0.9191,
+      "step": 1825
+    },
+    {
+      "epoch": 45.123456790123456,
+      "grad_norm": 5.386421203613281,
+      "learning_rate": 6.145714285714286e-06,
+      "loss": 0.9145,
+      "step": 1850
+    },
+    {
+      "epoch": 45.74074074074074,
+      "grad_norm": 3.068861246109009,
+      "learning_rate": 6.0742857142857145e-06,
+      "loss": 0.9095,
+      "step": 1875
+    },
+    {
+      "epoch": 46.34567901234568,
+      "grad_norm": 3.804973840713501,
+      "learning_rate": 6.0028571428571435e-06,
+      "loss": 0.912,
+      "step": 1900
+    },
+    {
+      "epoch": 46.96296296296296,
+      "grad_norm": 2.9225473403930664,
+      "learning_rate": 5.9314285714285725e-06,
+      "loss": 0.9049,
+      "step": 1925
+    },
+    {
+      "epoch": 47.5679012345679,
+      "grad_norm": 4.022708892822266,
+      "learning_rate": 5.86e-06,
+      "loss": 0.9049,
+      "step": 1950
+    },
+    {
+      "epoch": 48.17283950617284,
+      "grad_norm": 3.421691417694092,
+      "learning_rate": 5.788571428571429e-06,
+      "loss": 0.9101,
+      "step": 1975
+    },
+    {
+      "epoch": 48.79012345679013,
+      "grad_norm": 6.732350826263428,
+      "learning_rate": 5.717142857142858e-06,
+      "loss": 0.9105,
+      "step": 2000
+    },
+    {
+      "epoch": 48.79012345679013,
+      "eval_loss": 0.8953001499176025,
+      "eval_runtime": 2.1587,
+      "eval_samples_per_second": 66.707,
+      "eval_steps_per_second": 33.353,
+      "step": 2000
+    },
+    {
+      "epoch": 49.39506172839506,
+      "grad_norm": 5.506401538848877,
+      "learning_rate": 5.645714285714287e-06,
+      "loss": 0.9036,
+      "step": 2025
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 9.19892406463623,
+      "learning_rate": 5.574285714285714e-06,
+      "loss": 0.9107,
+      "step": 2050
+    },
+    {
+      "epoch": 50.617283950617285,
+      "grad_norm": 3.324119806289673,
+      "learning_rate": 5.502857142857143e-06,
+      "loss": 0.9118,
+      "step": 2075
+    },
+    {
+      "epoch": 51.22222222222222,
+      "grad_norm": 5.142299652099609,
+      "learning_rate": 5.431428571428572e-06,
+      "loss": 0.9098,
+      "step": 2100
+    },
+    {
+      "epoch": 51.839506172839506,
+      "grad_norm": 2.8806934356689453,
+      "learning_rate": 5.36e-06,
+      "loss": 0.9013,
+      "step": 2125
+    },
+    {
+      "epoch": 52.44444444444444,
+      "grad_norm": 4.728231430053711,
+      "learning_rate": 5.2885714285714285e-06,
+      "loss": 0.9049,
+      "step": 2150
+    },
+    {
+      "epoch": 53.04938271604938,
+      "grad_norm": 4.9596991539001465,
+      "learning_rate": 5.2171428571428575e-06,
+      "loss": 0.9128,
+      "step": 2175
+    },
+    {
+      "epoch": 53.666666666666664,
+      "grad_norm": 3.160998821258545,
+      "learning_rate": 5.145714285714286e-06,
+      "loss": 0.9003,
+      "step": 2200
+    },
+    {
+      "epoch": 54.27160493827161,
+      "grad_norm": 3.833195924758911,
+      "learning_rate": 5.074285714285715e-06,
+      "loss": 0.9088,
+      "step": 2225
+    },
+    {
+      "epoch": 54.888888888888886,
+      "grad_norm": 5.242589950561523,
+      "learning_rate": 5.002857142857144e-06,
+      "loss": 0.9005,
+      "step": 2250
+    },
+    {
+      "epoch": 55.49382716049383,
+      "grad_norm": 3.781388759613037,
+      "learning_rate": 4.931428571428572e-06,
+      "loss": 0.9028,
+      "step": 2275
+    },
+    {
+      "epoch": 56.098765432098766,
+      "grad_norm": 6.0595574378967285,
+      "learning_rate": 4.86e-06,
+      "loss": 0.9124,
+      "step": 2300
+    },
+    {
+      "epoch": 56.71604938271605,
+      "grad_norm": 2.7515597343444824,
+      "learning_rate": 4.788571428571429e-06,
+      "loss": 0.9025,
+      "step": 2325
+    },
+    {
+      "epoch": 57.32098765432099,
+      "grad_norm": 6.520521640777588,
+      "learning_rate": 4.717142857142857e-06,
+      "loss": 0.9065,
+      "step": 2350
+    },
+    {
+      "epoch": 57.93827160493827,
+      "grad_norm": 3.289445638656616,
+      "learning_rate": 4.645714285714286e-06,
+      "loss": 0.9004,
+      "step": 2375
+    },
+    {
+      "epoch": 58.54320987654321,
+      "grad_norm": 3.6132805347442627,
+      "learning_rate": 4.574285714285714e-06,
+      "loss": 0.9021,
+      "step": 2400
+    },
+    {
+      "epoch": 59.148148148148145,
+      "grad_norm": 5.021145343780518,
+      "learning_rate": 4.5028571428571434e-06,
+      "loss": 0.8957,
+      "step": 2425
+    },
+    {
+      "epoch": 59.76543209876543,
+      "grad_norm": 5.366466522216797,
+      "learning_rate": 4.431428571428572e-06,
+      "loss": 0.8986,
+      "step": 2450
+    },
+    {
+      "epoch": 60.370370370370374,
+      "grad_norm": 5.833218574523926,
+      "learning_rate": 4.360000000000001e-06,
+      "loss": 0.9045,
+      "step": 2475
+    },
+    {
+      "epoch": 60.98765432098765,
+      "grad_norm": 5.301181793212891,
+      "learning_rate": 4.288571428571429e-06,
+      "loss": 0.8975,
+      "step": 2500
+    },
+    {
+      "epoch": 61.592592592592595,
+      "grad_norm": 3.989539861679077,
+      "learning_rate": 4.217142857142858e-06,
+      "loss": 0.9021,
+      "step": 2525
+    },
+    {
+      "epoch": 62.19753086419753,
+      "grad_norm": 13.111737251281738,
+      "learning_rate": 4.145714285714286e-06,
+      "loss": 0.9043,
+      "step": 2550
+    },
+    {
+      "epoch": 62.81481481481482,
+      "grad_norm": 3.4066903591156006,
+      "learning_rate": 4.074285714285714e-06,
+      "loss": 0.8929,
+      "step": 2575
+    },
+    {
+      "epoch": 63.41975308641975,
+      "grad_norm": 3.9170608520507812,
+      "learning_rate": 4.002857142857143e-06,
+      "loss": 0.8998,
+      "step": 2600
+    },
+    {
+      "epoch": 64.0246913580247,
+      "grad_norm": 3.5934042930603027,
+      "learning_rate": 3.931428571428571e-06,
+      "loss": 0.898,
+      "step": 2625
+    },
+    {
+      "epoch": 64.64197530864197,
+      "grad_norm": 3.3771822452545166,
+      "learning_rate": 3.86e-06,
+      "loss": 0.901,
+      "step": 2650
+    },
+    {
+      "epoch": 65.24691358024691,
+      "grad_norm": 3.5741279125213623,
+      "learning_rate": 3.7885714285714285e-06,
+      "loss": 0.903,
+      "step": 2675
+    },
+    {
+      "epoch": 65.8641975308642,
+      "grad_norm": 4.369333267211914,
+      "learning_rate": 3.7171428571428575e-06,
+      "loss": 0.8907,
+      "step": 2700
+    },
+    {
+      "epoch": 66.46913580246914,
+      "grad_norm": 2.9996423721313477,
+      "learning_rate": 3.6457142857142857e-06,
+      "loss": 0.9008,
+      "step": 2725
+    },
+    {
+      "epoch": 67.07407407407408,
+      "grad_norm": 5.098217487335205,
+      "learning_rate": 3.5742857142857147e-06,
+      "loss": 0.8979,
+      "step": 2750
+    },
+    {
+      "epoch": 67.69135802469135,
+      "grad_norm": 3.8548665046691895,
+      "learning_rate": 3.5028571428571433e-06,
+      "loss": 0.8906,
+      "step": 2775
+    },
+    {
+      "epoch": 68.29629629629629,
+      "grad_norm": 4.787322521209717,
+      "learning_rate": 3.431428571428572e-06,
+      "loss": 0.8949,
+      "step": 2800
+    },
+    {
+      "epoch": 68.91358024691358,
+      "grad_norm": 2.8501498699188232,
+      "learning_rate": 3.3600000000000004e-06,
+      "loss": 0.8932,
+      "step": 2825
+    },
+    {
+      "epoch": 69.51851851851852,
+      "grad_norm": 7.697382926940918,
+      "learning_rate": 3.2885714285714286e-06,
+      "loss": 0.8961,
+      "step": 2850
+    },
+    {
+      "epoch": 70.12345679012346,
+      "grad_norm": 3.5617403984069824,
+      "learning_rate": 3.2171428571428576e-06,
+      "loss": 0.8975,
+      "step": 2875
+    },
+    {
+      "epoch": 70.74074074074075,
+      "grad_norm": 4.286247253417969,
+      "learning_rate": 3.1457142857142858e-06,
+      "loss": 0.8988,
+      "step": 2900
+    },
+    {
+      "epoch": 71.34567901234568,
+      "grad_norm": 3.0174379348754883,
+      "learning_rate": 3.074285714285715e-06,
+      "loss": 0.8986,
+      "step": 2925
+    },
+    {
+      "epoch": 71.96296296296296,
+      "grad_norm": 5.708584308624268,
+      "learning_rate": 3.002857142857143e-06,
+      "loss": 0.8888,
+      "step": 2950
+    },
+    {
+      "epoch": 72.5679012345679,
+      "grad_norm": 7.933815956115723,
+      "learning_rate": 2.9314285714285716e-06,
+      "loss": 0.9,
+      "step": 2975
+    },
+    {
+      "epoch": 73.17283950617283,
+      "grad_norm": 3.4261972904205322,
+      "learning_rate": 2.86e-06,
+      "loss": 0.8951,
+      "step": 3000
+    },
+    {
+      "epoch": 73.17283950617283,
+      "eval_loss": 0.8869494795799255,
+      "eval_runtime": 2.1798,
+      "eval_samples_per_second": 66.061,
+      "eval_steps_per_second": 33.03,
+      "step": 3000
+    },
+    {
+      "epoch": 73.79012345679013,
+      "grad_norm": 4.3120646476745605,
+      "learning_rate": 2.7885714285714287e-06,
+      "loss": 0.8897,
+      "step": 3025
+    },
+    {
+      "epoch": 74.39506172839506,
+      "grad_norm": 3.6650469303131104,
+      "learning_rate": 2.7171428571428577e-06,
+      "loss": 0.8961,
+      "step": 3050
+    },
+    {
+      "epoch": 75.0,
+      "grad_norm": 7.670346736907959,
+      "learning_rate": 2.645714285714286e-06,
+      "loss": 0.9003,
+      "step": 3075
+    },
+    {
+      "epoch": 75.61728395061728,
+      "grad_norm": 3.292160987854004,
+      "learning_rate": 2.574285714285715e-06,
+      "loss": 0.8946,
+      "step": 3100
+    },
+    {
+      "epoch": 76.22222222222223,
+      "grad_norm": 3.5280263423919678,
+      "learning_rate": 2.502857142857143e-06,
+      "loss": 0.8934,
+      "step": 3125
+    },
+    {
+      "epoch": 76.8395061728395,
+      "grad_norm": 5.340327739715576,
+      "learning_rate": 2.4314285714285717e-06,
+      "loss": 0.8984,
+      "step": 3150
+    },
+    {
+      "epoch": 77.44444444444444,
+      "grad_norm": 6.106954574584961,
+      "learning_rate": 2.3600000000000003e-06,
+      "loss": 0.8958,
+      "step": 3175
+    },
+    {
+      "epoch": 78.04938271604938,
+      "grad_norm": 3.5689122676849365,
+      "learning_rate": 2.288571428571429e-06,
+      "loss": 0.8968,
+      "step": 3200
+    },
+    {
+      "epoch": 78.66666666666667,
+      "grad_norm": 3.928802967071533,
+      "learning_rate": 2.2171428571428575e-06,
+      "loss": 0.8909,
+      "step": 3225
+    },
+    {
+      "epoch": 79.27160493827161,
+      "grad_norm": 3.5558717250823975,
+      "learning_rate": 2.145714285714286e-06,
+      "loss": 0.8881,
+      "step": 3250
+    },
+    {
+      "epoch": 79.88888888888889,
+      "grad_norm": 3.194141387939453,
+      "learning_rate": 2.0742857142857146e-06,
+      "loss": 0.8868,
+      "step": 3275
+    },
+    {
+      "epoch": 80.49382716049382,
+      "grad_norm": 5.6881232261657715,
+      "learning_rate": 2.0028571428571432e-06,
+      "loss": 0.8973,
+      "step": 3300
+    },
+    {
+      "epoch": 81.09876543209876,
+      "grad_norm": 3.105429172515869,
+      "learning_rate": 1.9314285714285714e-06,
+      "loss": 0.8935,
+      "step": 3325
+    },
+    {
+      "epoch": 81.71604938271605,
+      "grad_norm": 3.2020113468170166,
+      "learning_rate": 1.8600000000000002e-06,
+      "loss": 0.89,
+      "step": 3350
+    },
+    {
+      "epoch": 82.32098765432099,
+      "grad_norm": 3.5079753398895264,
+      "learning_rate": 1.7885714285714288e-06,
+      "loss": 0.8957,
+      "step": 3375
+    },
+    {
+      "epoch": 82.93827160493827,
+      "grad_norm": 3.3880198001861572,
+      "learning_rate": 1.7171428571428572e-06,
+      "loss": 0.8914,
+      "step": 3400
+    },
+    {
+      "epoch": 83.54320987654322,
+      "grad_norm": 6.072048664093018,
+      "learning_rate": 1.6457142857142857e-06,
+      "loss": 0.8904,
+      "step": 3425
+    },
+    {
+      "epoch": 84.14814814814815,
+      "grad_norm": 2.919877529144287,
+      "learning_rate": 1.5742857142857143e-06,
+      "loss": 0.8842,
+      "step": 3450
+    },
+    {
+      "epoch": 84.76543209876543,
+      "grad_norm": 3.742579936981201,
+      "learning_rate": 1.502857142857143e-06,
+      "loss": 0.8903,
+      "step": 3475
+    },
+    {
+      "epoch": 85.37037037037037,
+      "grad_norm": 3.9216341972351074,
+      "learning_rate": 1.4314285714285717e-06,
+      "loss": 0.8962,
+      "step": 3500
+    },
+    {
+      "epoch": 85.98765432098766,
+      "grad_norm": 3.594411849975586,
+      "learning_rate": 1.3600000000000001e-06,
+      "loss": 0.894,
+      "step": 3525
+    },
+    {
+      "epoch": 86.5925925925926,
+      "grad_norm": 3.7163913249969482,
+      "learning_rate": 1.2885714285714287e-06,
+      "loss": 0.8931,
+      "step": 3550
+    },
+    {
+      "epoch": 87.19753086419753,
+      "grad_norm": 2.8378684520721436,
+      "learning_rate": 1.2171428571428573e-06,
+      "loss": 0.8855,
+      "step": 3575
+    },
+    {
+      "epoch": 87.81481481481481,
+      "grad_norm": 3.5566790103912354,
+      "learning_rate": 1.1457142857142859e-06,
+      "loss": 0.8911,
+      "step": 3600
+    },
+    {
+      "epoch": 88.41975308641975,
+      "grad_norm": 3.187382936477661,
+      "learning_rate": 1.0742857142857145e-06,
+      "loss": 0.8979,
+      "step": 3625
+    },
+    {
+      "epoch": 89.0246913580247,
+      "grad_norm": 3.7930212020874023,
+      "learning_rate": 1.0028571428571428e-06,
+      "loss": 0.8824,
+      "step": 3650
+    },
+    {
+      "epoch": 89.64197530864197,
+      "grad_norm": 3.2194180488586426,
+      "learning_rate": 9.314285714285714e-07,
+      "loss": 0.8915,
+      "step": 3675
+    },
+    {
+      "epoch": 90.24691358024691,
+      "grad_norm": 3.050337076187134,
+      "learning_rate": 8.6e-07,
+      "loss": 0.8866,
+      "step": 3700
+    },
+    {
+      "epoch": 90.8641975308642,
+      "grad_norm": 5.006812572479248,
+      "learning_rate": 7.885714285714287e-07,
+      "loss": 0.8877,
+      "step": 3725
+    },
+    {
+      "epoch": 91.46913580246914,
+      "grad_norm": 3.206684112548828,
+      "learning_rate": 7.171428571428572e-07,
+      "loss": 0.8894,
+      "step": 3750
+    },
+    {
+      "epoch": 92.07407407407408,
+      "grad_norm": 4.174693584442139,
+      "learning_rate": 6.457142857142858e-07,
+      "loss": 0.8906,
+      "step": 3775
+    },
+    {
+      "epoch": 92.69135802469135,
+      "grad_norm": 5.580083847045898,
+      "learning_rate": 5.742857142857143e-07,
+      "loss": 0.8962,
+      "step": 3800
+    },
+    {
+      "epoch": 93.29629629629629,
+      "grad_norm": 4.221833229064941,
+      "learning_rate": 5.028571428571429e-07,
+      "loss": 0.8886,
+      "step": 3825
+    },
+    {
+      "epoch": 93.91358024691358,
+      "grad_norm": 3.687716484069824,
+      "learning_rate": 4.3142857142857146e-07,
+      "loss": 0.8911,
+      "step": 3850
+    },
+    {
+      "epoch": 94.51851851851852,
+      "grad_norm": 4.194035530090332,
+      "learning_rate": 3.6e-07,
+      "loss": 0.889,
+      "step": 3875
+    },
+    {
+      "epoch": 95.12345679012346,
+      "grad_norm": 4.321438312530518,
+      "learning_rate": 2.885714285714286e-07,
+      "loss": 0.8926,
+      "step": 3900
+    },
+    {
+      "epoch": 95.74074074074075,
+      "grad_norm": 4.36216926574707,
+      "learning_rate": 2.1714285714285715e-07,
+      "loss": 0.8923,
+      "step": 3925
+    },
+    {
+      "epoch": 96.34567901234568,
+      "grad_norm": 3.93856143951416,
+      "learning_rate": 1.4571428571428574e-07,
+      "loss": 0.8849,
+      "step": 3950
+    },
+    {
+      "epoch": 96.96296296296296,
+      "grad_norm": 2.968627691268921,
+      "learning_rate": 7.428571428571429e-08,
+      "loss": 0.8904,
+      "step": 3975
+    },
+    {
+      "epoch": 97.5679012345679,
+      "grad_norm": 16.590002059936523,
+      "learning_rate": 2.8571428571428576e-09,
+      "loss": 0.8853,
+      "step": 4000
+    },
+    {
+      "epoch": 97.5679012345679,
+      "eval_loss": 0.8817942142486572,
+      "eval_runtime": 2.1317,
+      "eval_samples_per_second": 67.551,
+      "eval_steps_per_second": 33.775,
+      "step": 4000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 98,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3285475532317440.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cf416a3be0d19e0ce5aadbb31f093c5d913fee53
--- /dev/null
+++ b/checkpoint-4000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66d367dc04409d63341644c780ebdb997e8756f9aa9f6d110afc5d9ab8de84be
+size 5905
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1197099a882c66f2bbab2210646af21b05d05ee4
--- /dev/null
+++ b/config.json
@@ -0,0 +1,91 @@
+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1748094b7e4b8c1fb66bf2357c44c5f625a36179
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}
diff --git a/logs/events.out.tfevents.1756106108.MSI.40384.0 b/logs/events.out.tfevents.1756106108.MSI.40384.0
new file mode 100644
index 0000000000000000000000000000000000000000..05eb1425642137076330ffafc058c20c6583c5e0
--- /dev/null
+++ b/logs/events.out.tfevents.1756106108.MSI.40384.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acb5c639753cb4d122ae95afcb18ae1214d10c2d954d97a6d44274330c775c93
+size 41712
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..063a7debbe0b96bed7d11b8b1e6151197d55864b
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b10dd87b217ab2fc492088d02d67c7955fbbff9f22b6fda9133dfa1744e6d9d
+size 577789320
diff --git a/preprocessor_config.json b/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..98a41e66e16f3225a15cb2b51c963fe2213aa273
--- /dev/null
+++ b/preprocessor_config.json
@@ -0,0 +1,19 @@
+{
+  "do_normalize": false,
+  "feature_extractor_type": "SpeechT5FeatureExtractor",
+  "feature_size": 1,
+  "fmax": 7600,
+  "fmin": 80,
+  "frame_signal_scale": 1.0,
+  "hop_length": 16,
+  "mel_floor": 1e-10,
+  "num_mel_bins": 80,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "SpeechT5Processor",
+  "reduction_factor": 2,
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "win_function": "hann_window",
+  "win_length": 64
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,13 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/spkrec_cache/classifier.ckpt b/spkrec_cache/classifier.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..d6f70f222826ea5490bfad657373af3d6d2a08ba
--- /dev/null
+++ b/spkrec_cache/classifier.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd9e3634fe68bd0a427c95e354c0c677374f62b3f434e45b78599950d860d535
+size 5534328
diff --git a/spkrec_cache/embedding_model.ckpt b/spkrec_cache/embedding_model.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8ba959669e5570dbbf8076bc6b8a79555a81d5c4
--- /dev/null
+++ b/spkrec_cache/embedding_model.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0575cb64845e6b9a10db9bcb74d5ac32b326b8dc90352671d345e2ee3d0126a2
+size 83316686
diff --git a/spkrec_cache/hyperparams.yaml b/spkrec_cache/hyperparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..70e4cd0beb74ca08a2df9de6bd79d938670a4d15
--- /dev/null
+++ b/spkrec_cache/hyperparams.yaml
@@ -0,0 +1,58 @@
+# ############################################################################
+# Model: ECAPA big for Speaker verification
+# ############################################################################
+
+# Feature parameters
+n_mels: 80
+
+# Pretrain folder (HuggingFace)
+pretrained_path: speechbrain/spkrec-ecapa-voxceleb
+
+# Output parameters
+out_n_neurons: 7205
+
+# Model params
+compute_features: !new:speechbrain.lobes.features.Fbank
+    n_mels: !ref <n_mels>
+
+mean_var_norm: !new:speechbrain.processing.features.InputNormalization
+    norm_type: sentence
+    std_norm: False
+
+embedding_model: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
+    input_size: !ref <n_mels>
+    channels: [1024, 1024, 1024, 1024, 3072]
+    kernel_sizes: [5, 3, 3, 3, 1]
+    dilations: [1, 2, 3, 4, 1]
+    attention_channels: 128
+    lin_neurons: 192
+
+classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
+    input_size: 192
+    out_neurons: !ref <out_n_neurons>
+
+mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
+    norm_type: global
+    std_norm: False
+
+modules:
+    compute_features: !ref <compute_features>
+    mean_var_norm: !ref <mean_var_norm>
+    embedding_model: !ref <embedding_model>
+    mean_var_norm_emb: !ref <mean_var_norm_emb>
+    classifier: !ref <classifier>
+        
+label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
+
+        
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+    loadables:
+        embedding_model: !ref <embedding_model>
+        mean_var_norm_emb: !ref <mean_var_norm_emb>
+        classifier: !ref <classifier>
+        label_encoder: !ref <label_encoder>
+    paths:
+        embedding_model: !ref <pretrained_path>/embedding_model.ckpt
+        mean_var_norm_emb: !ref <pretrained_path>/mean_var_norm_emb.ckpt
+        classifier: !ref <pretrained_path>/classifier.ckpt
+        label_encoder: !ref <pretrained_path>/label_encoder.txt
diff --git a/spkrec_cache/label_encoder.ckpt b/spkrec_cache/label_encoder.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1b4ba4327aeb75727f85395533bc448740cb1d1d
--- /dev/null
+++ b/spkrec_cache/label_encoder.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e13c3a167bb4112685670ee896d20e2b565af16b3a4ceeaa8689fa4d22adb8b9
+size 128619
diff --git a/spkrec_cache/mean_var_norm_emb.ckpt b/spkrec_cache/mean_var_norm_emb.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..a7fb23a2c35884d02fbf2803755228109852ba43
--- /dev/null
+++ b/spkrec_cache/mean_var_norm_emb.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd70225b05b37be64fc5a95e24395d804231d43f74b2e1e5a513db7b69b34c33
+size 1921
diff --git a/spm_char.model b/spm_char.model
new file mode 100644
index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2
--- /dev/null
+++ b/spm_char.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473
diff --git a/style_adaptor.pt b/style_adaptor.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3567c7bc822095537006823f57dd4d90a068077e
--- /dev/null
+++ b/style_adaptor.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65b08618225d68d0398c44b020f795f8ba9c763c67cde2c5834c0dcb5ff44019
+size 2118433
diff --git a/style_fusion.pt b/style_fusion.pt
new file mode 100644
index 0000000000000000000000000000000000000000..543f13b773613cd52dbc387660b28fd9ad1d6906
--- /dev/null
+++ b/style_fusion.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:891f2873fa8f0d20580c329a718e9b3674489cfd035d93cf9fe753de4d359b65
+size 1972815
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,64 @@
+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}