diff --git a/Hindustani-20M-CNN/best_adapters.pt b/Hindustani-20M-CNN/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..dde4aa25eac7fb07ce352da50261f38b09a90e21 --- /dev/null +++ b/Hindustani-20M-CNN/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:563543e44916a0bccb44d2e17c392f44644badaca93842cd35bfe813fc491330 +size 89348293 diff --git a/Hindustani-20M-CNN/epoch_10_adapters.pt b/Hindustani-20M-CNN/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c285916abd4974cd86f9fb0ee7d9b425f1914df --- /dev/null +++ b/Hindustani-20M-CNN/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8aad4e02e4c491f85ec60ac0f76aee9e9462ddf340cecf3d1fbd63e68f309de +size 89348805 diff --git a/Hindustani-20M-CNN/summary.jsonl b/Hindustani-20M-CNN/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..829f082c9bf40df2e39859084378420821e621a6 --- /dev/null +++ b/Hindustani-20M-CNN/summary.jsonl @@ -0,0 +1,26 @@ +{"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743147931", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 1565, "train_loss": 0.7346, "val_loss": 0.6447, "val_loss2": 0.6446, "_timestamp": 1743165321, "_runtime": 17389} + +{"epoch": [2], "step": 3130, "train_loss": 0.7279, "val_loss": 0.6418, "val_loss2": 0.6419, "_timestamp": 1743189912, "_runtime": 41980} + +{"epoch": [3], "step": 4695, "train_loss": 0.7247, "val_loss": 0.6399, "val_loss2": 0.6399, "_timestamp": 1743214898, "_runtime": 66966} + +{"epoch": [4], "step": 6260, "train_loss": 0.7226, "val_loss": 0.6386, "val_loss2": 0.6386, "_timestamp": 1743239210, "_runtime": 91278} + +{"epoch": [5], "step": 7825, "train_loss": 0.7228, "val_loss": 0.6375, "val_loss2": 0.6375, "_timestamp": 1743263496, "_runtime": 115564} + +{"epoch": [6], "step": 9390, "train_loss": 0.7202, "val_loss": 0.6366, "val_loss2": 0.6365, "_timestamp": 1743287951, "_runtime": 140019} + +{"epoch": [7], "step": 10955, "train_loss": 0.7206, "val_loss": 0.636, "val_loss2": 0.6361, "_timestamp": 1743312250, "_runtime": 164318} + +{"epoch": [8], "step": 12520, "train_loss": 0.7201, "val_loss": 0.6356, "val_loss2": 0.6354, "_timestamp": 1743330721, "_runtime": 182789} + +{"epoch": [9], "step": 14085, "train_loss": 0.7188, "val_loss": 0.6356, "val_loss2": 0.6355, "_timestamp": 1743354070, "_runtime": 206138} + +{"epoch": [10], "step": 15650, "train_loss": 0.7185, "val_loss": 0.6344, "val_loss2": 0.6344, "_timestamp": 1743377342, "_runtime": 229410} + +{"epoch": [11], "step": 17215, "train_loss": 0.7177, "val_loss": 0.6343, "val_loss2": 0.6343, "_timestamp": 1743400710, "_runtime": 252778} + +{"epoch": [12], "step": 18780, "train_loss": 0.7183, "val_loss": 0.6341, "val_loss2": 0.6342, "_timestamp": 1743424752, "_runtime": 276820} + diff --git a/Hindustani-20M-Transformer/best_adapters.pt b/Hindustani-20M-Transformer/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..edb41ff98f80da783c4656effcbff8c462a71314 --- /dev/null +++ b/Hindustani-20M-Transformer/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0065801ff7359194f5193ab694dd7751549f0e70af56d279989bfe2874fe2e74 +size 73130181 diff --git a/Hindustani-20M-Transformer/epoch_10_adapters.pt b/Hindustani-20M-Transformer/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..48b19beb29b80d477d97a88044ac85e6d4eb91fa --- /dev/null +++ b/Hindustani-20M-Transformer/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a66ff9a7306227b3cfd8c85c1a3d2bff3e566676a97a548cb40d2f8b053faaf +size 73131589 diff --git a/Hindustani-20M-Transformer/summary.jsonl b/Hindustani-20M-Transformer/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..70e647569b05fea1457aa2702b9f52ce7ed908ee --- /dev/null +++ b/Hindustani-20M-Transformer/summary.jsonl @@ -0,0 +1,26 @@ +{"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1742715616", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 1565, "train_loss": 0.7348, "val_loss": 0.6459, "val_loss2": 0.6457, "_timestamp": 1742729556, "_runtime": 13939} + +{"epoch": [2], "step": 3130, "train_loss": 0.7278, "val_loss": 0.6423, "val_loss2": 0.6425, "_timestamp": 1742750773, "_runtime": 35156} + +{"epoch": [3], "step": 4695, "train_loss": 0.7251, "val_loss": 0.6406, "val_loss2": 0.6408, "_timestamp": 1742771679, "_runtime": 56062} + +{"epoch": [4], "step": 6260, "train_loss": 0.7231, "val_loss": 0.6398, "val_loss2": 0.6397, "_timestamp": 1742793020, "_runtime": 77403} + +{"epoch": [5], "step": 7825, "train_loss": 0.7227, "val_loss": 0.6387, "val_loss2": 0.6388, "_timestamp": 1742813435, "_runtime": 97818} + +{"epoch": [6], "step": 9390, "train_loss": 0.7212, "val_loss": 0.6375, "val_loss2": 0.6376, "_timestamp": 1742834179, "_runtime": 118562} + +{"epoch": [7], "step": 10955, "train_loss": 0.721, "val_loss": 0.6372, "val_loss2": 0.6371, "_timestamp": 1742854218, "_runtime": 138601} + +{"epoch": [8], "step": 12520, "train_loss": 0.7206, "val_loss": 0.6364, "val_loss2": 0.6366, "_timestamp": 1742874289, "_runtime": 158672} + +{"epoch": [9], "step": 14085, "train_loss": 0.7198, "val_loss": 0.6363, "val_loss2": 0.6362, "_timestamp": 1742894221, "_runtime": 178604} + +{"epoch": [10], "step": 15650, "train_loss": 0.7191, "val_loss": 0.6355, "val_loss2": 0.6358, "_timestamp": 1742916916, "_runtime": 201299} + +{"epoch": [11], "step": 17215, "train_loss": 0.7185, "val_loss": 0.6358, "val_loss2": 0.6354, "_timestamp": 1742937243, "_runtime": 221626} + +{"epoch": [12], "step": 18780, "train_loss": 0.7195, "val_loss": 0.6352, "val_loss2": 0.6356, "_timestamp": 1742957525, "_runtime": 241908} + diff --git a/Hindustani-2M-CNN/best_adapters.pt b/Hindustani-2M-CNN/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..92e5703a81c7e5c8218c8e19d9d4bd784627c44f --- /dev/null +++ b/Hindustani-2M-CNN/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ad57b0bd253112940c9f27b7c4820db0da6f222069d48a93ff0ad68da2598e +size 6025775 diff --git a/Hindustani-2M-CNN/epoch_10_adapters.pt b/Hindustani-2M-CNN/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd23dac3d5cd05693fb4993b6ee029d6859d9d00 --- /dev/null +++ b/Hindustani-2M-CNN/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb0ce1f62b84d90dd02f4159fddf34609ded39217629805c0298d2d198204ca9 +size 6025951 diff --git a/Hindustani-2M-CNN/summary.jsonl b/Hindustani-2M-CNN/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f9b2e1eecc4cf9b12e854e741661d2be09985331 --- /dev/null +++ b/Hindustani-2M-CNN/summary.jsonl @@ -0,0 +1,24 @@ +{"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743875307", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 1565, "train_loss": 0.7403, "val_loss": 0.6505, "val_loss2": 0.6504, "_timestamp": 1743885783, "_runtime": 10474} + +{"epoch": [2], "step": 3130, "train_loss": 0.7321, "val_loss": 0.6477, "val_loss2": 0.6479, "_timestamp": 1743902105, "_runtime": 26796} + +{"epoch": [3], "step": 4695, "train_loss": 0.7297, "val_loss": 0.6466, "val_loss2": 0.6466, "_timestamp": 1743918387, "_runtime": 43078} + +{"epoch": [4], "step": 6260, "train_loss": 0.728, "val_loss": 0.6458, "val_loss2": 0.6456, "_timestamp": 1743934107, "_runtime": 58798} + +{"epoch": [5], "step": 7825, "train_loss": 0.7286, "val_loss": 0.6449, "val_loss2": 0.6449, "_timestamp": 1743950645, "_runtime": 75336} + +{"epoch": [6], "step": 9390, "train_loss": 0.7271, "val_loss": 0.6448, "val_loss2": 0.6445, "_timestamp": 1743967320, "_runtime": 92011} + +{"epoch": [7], "step": 10955, "train_loss": 0.7268, "val_loss": 0.6439, "val_loss2": 0.6437, "_timestamp": 1743983597, "_runtime": 108288} + +{"epoch": [8], "step": 12520, "train_loss": 0.7275, "val_loss": 0.6436, "val_loss2": 0.6436, "_timestamp": 1743999422, "_runtime": 124113} + +{"epoch": [9], "step": 14085, "train_loss": 0.7266, "val_loss": 0.6433, "val_loss2": 0.6432, "_timestamp": 1744015961, "_runtime": 140652} + +{"epoch": [10], "step": 15650, "train_loss": 0.726, "val_loss": 0.6428, "val_loss2": 0.6429, "_timestamp": 1744031700, "_runtime": 156391} + +{"epoch": [11], "step": 17215, "train_loss": 0.7253, "val_loss": 0.6429, "val_loss2": 0.6428, "_timestamp": 1744048427, "_runtime": 173118} + diff --git a/Hindustani-2M-Transformer/best_adapters.pt b/Hindustani-2M-Transformer/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..54d2af33a8d180afb8fa08939cbeed9d6e2aeef6 --- /dev/null +++ b/Hindustani-2M-Transformer/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be3afda934d4e45fcd20a41807c3ff1279187611dccb3b65a62ae944f1cb5d55 +size 12808073 diff --git a/Hindustani-2M-Transformer/epoch_10_adapters.pt b/Hindustani-2M-Transformer/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..c68c2b0257d7cb5d56ad1c3be7612666c040f04b --- /dev/null +++ b/Hindustani-2M-Transformer/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:773ca7d871ab0a89236a2b40ad9ab2acd2a6a40c9c682642df201d8a59674015 +size 12808781 diff --git a/Hindustani-2M-Transformer/summary.jsonl b/Hindustani-2M-Transformer/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..44acf6577c48d96cc3ec97abebf6346ebfa2e919 --- /dev/null +++ b/Hindustani-2M-Transformer/summary.jsonl @@ -0,0 +1,28 @@ +{"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1744056337", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 1565, "train_loss": 0.7393, "val_loss": 0.6498, "val_loss2": 0.6501, "_timestamp": 1744068610, "_runtime": 12272} + +{"epoch": [2], "step": 3130, "train_loss": 0.7313, "val_loss": 0.6473, "val_loss2": 0.6476, "_timestamp": 1744087699, "_runtime": 31361} + +{"epoch": [3], "step": 4695, "train_loss": 0.729, "val_loss": 0.646, "val_loss2": 0.6461, "_timestamp": 1744105752, "_runtime": 49414} + +{"epoch": [4], "step": 6260, "train_loss": 0.7282, "val_loss": 0.6452, "val_loss2": 0.6453, "_timestamp": 1744124547, "_runtime": 68209} + +{"epoch": [5], "step": 7825, "train_loss": 0.7278, "val_loss": 0.6442, "val_loss2": 0.6441, "_timestamp": 1744142368, "_runtime": 86030} + +{"epoch": [6], "step": 9390, "train_loss": 0.7263, "val_loss": 0.6437, "val_loss2": 0.6439, "_timestamp": 1744160135, "_runtime": 103797} + +{"epoch": [7], "step": 10955, "train_loss": 0.7267, "val_loss": 0.6436, "val_loss2": 0.6429, "_timestamp": 1744177825, "_runtime": 121487} + +{"epoch": [8], "step": 12520, "train_loss": 0.7264, "val_loss": 0.643, "val_loss2": 0.6432, "_timestamp": 1744195561, "_runtime": 139223} + +{"epoch": [9], "step": 14085, "train_loss": 0.7259, "val_loss": 0.6427, "val_loss2": 0.6428, "_timestamp": 1744213259, "_runtime": 156921} + +{"epoch": [10], "step": 15650, "train_loss": 0.7246, "val_loss": 0.6425, "val_loss2": 0.6423, "_timestamp": 1744232117, "_runtime": 175779} + +{"epoch": [11], "step": 17215, "train_loss": 0.7252, "val_loss": 0.6421, "val_loss2": 0.642, "_timestamp": 1744249856, "_runtime": 193518} + +{"epoch": [12], "step": 18780, "train_loss": 0.7256, "val_loss": 0.6415, "val_loss2": 0.6417, "_timestamp": 1744269115, "_runtime": 212777} + +{"epoch": [13], "step": 20345, "train_loss": 0.7239, "val_loss": 0.6421, "val_loss2": 0.6417, "_timestamp": 1744288494, "_runtime": 232156} + diff --git a/Hindustani-40M-CNN/best_adapters.pt b/Hindustani-40M-CNN/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..54d39e511234470775f2088914d03284ea91dd89 --- /dev/null +++ b/Hindustani-40M-CNN/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:553234f8dbd18d880aa0f9bb18323e13b28b7f1d02e467d4512215442833ab7e +size 178445189 diff --git a/Hindustani-40M-CNN/epoch_10_adapters.pt b/Hindustani-40M-CNN/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2b2882c7ef16eb2174829b53ba9cf022069e536 --- /dev/null +++ b/Hindustani-40M-CNN/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd6df5a64da5e77a2f5fe6e63399344489d1ce28c6c304a0696bb1678ba6ed8 +size 178445701 diff --git a/Hindustani-40M-CNN/summary.jsonl b/Hindustani-40M-CNN/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f62739c0b390f8666b0167a7f33f8898f928ea32 --- /dev/null +++ b/Hindustani-40M-CNN/summary.jsonl @@ -0,0 +1,26 @@ +{"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743429017", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 1565, "train_loss": 0.7329, "val_loss": 0.6431, "val_loss2": 0.6431, "_timestamp": 1743446526, "_runtime": 17508} + +{"epoch": [2], "step": 3130, "train_loss": 0.726, "val_loss": 0.6397, "val_loss2": 0.6397, "_timestamp": 1743471378, "_runtime": 42360} + +{"epoch": [3], "step": 4695, "train_loss": 0.7229, "val_loss": 0.6378, "val_loss2": 0.6379, "_timestamp": 1743496402, "_runtime": 67384} + +{"epoch": [4], "step": 6260, "train_loss": 0.721, "val_loss": 0.6366, "val_loss2": 0.6365, "_timestamp": 1743521515, "_runtime": 92497} + +{"epoch": [5], "step": 7825, "train_loss": 0.72, "val_loss": 0.6353, "val_loss2": 0.6358, "_timestamp": 1743540759, "_runtime": 111741} + +{"epoch": [6], "step": 9390, "train_loss": 0.7181, "val_loss": 0.6341, "val_loss2": 0.6341, "_timestamp": 1743557812, "_runtime": 128794} + +{"epoch": [7], "step": 10955, "train_loss": 0.7173, "val_loss": 0.6341, "val_loss2": 0.6341, "_timestamp": 1743574819, "_runtime": 145801} + +{"epoch": [8], "step": 12520, "train_loss": 0.7182, "val_loss": 0.6329, "val_loss2": 0.6332, "_timestamp": 1743591569, "_runtime": 162551} + +{"epoch": [9], "step": 14085, "train_loss": 0.7174, "val_loss": 0.6329, "val_loss2": 0.6328, "_timestamp": 1743608080, "_runtime": 179062} + +{"epoch": [10], "step": 15650, "train_loss": 0.716, "val_loss": 0.6321, "val_loss2": 0.6319, "_timestamp": 1743625120, "_runtime": 196102} + +{"epoch": [11], "step": 17215, "train_loss": 0.7151, "val_loss": 0.6322, "val_loss2": 0.6323, "_timestamp": 1743642231, "_runtime": 213213} + +{"epoch": [12], "step": 18780, "train_loss": 0.7166, "val_loss": 0.631, "val_loss2": 0.6313, "_timestamp": 1743658843, "_runtime": 229825} + diff --git a/Hindustani-40M-Transformer/best_adapters.pt b/Hindustani-40M-Transformer/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..266feb1337693d518d762405c4eb98fdf2b2541c --- /dev/null +++ b/Hindustani-40M-Transformer/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a7cdb4f49bf18aa6a7f0ff1c99f4c18131fa7f72feed821fc916ea690a5a43 +size 146260733 diff --git a/Hindustani-40M-Transformer/epoch_10_adapters.pt b/Hindustani-40M-Transformer/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..e10ace0deb96440f38396ddc3001db7dcd08ea89 --- /dev/null +++ b/Hindustani-40M-Transformer/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f5b4c16508d686118fd28f96fa8df0250ebc4cc10603473c7c22507199a58d8 +size 146263541 diff --git a/Hindustani-40M-Transformer/summary.jsonl b/Hindustani-40M-Transformer/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..875106c6bb9225c43af89fcb9beb215733c01124 --- /dev/null +++ b/Hindustani-40M-Transformer/summary.jsonl @@ -0,0 +1,24 @@ +{"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1742190748", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 1565, "train_loss": 0.7423, "val_loss": 0.649, "val_loss2": 0.6486, "_timestamp": 1742208013, "_runtime": 17264} + +{"epoch": [2], "step": 3130, "train_loss": 0.7298, "val_loss": 0.6445, "val_loss2": 0.6449, "_timestamp": 1742232058, "_runtime": 41309} + +{"epoch": [3], "step": 4695, "train_loss": 0.7266, "val_loss": 0.6424, "val_loss2": 0.6425, "_timestamp": 1742256099, "_runtime": 65350} + +{"epoch": [4], "step": 6260, "train_loss": 0.7238, "val_loss": 0.6414, "val_loss2": 0.6413, "_timestamp": 1742280163, "_runtime": 89414} + +{"epoch": [5], "step": 7825, "train_loss": 0.7235, "val_loss": 0.6395, "val_loss2": 0.6395, "_timestamp": 1742304443, "_runtime": 113694} + +{"epoch": [6], "step": 9390, "train_loss": 0.7218, "val_loss": 0.6383, "val_loss2": 0.6387, "_timestamp": 1742328903, "_runtime": 138154} + +{"epoch": [7], "step": 10955, "train_loss": 0.7213, "val_loss": 0.638, "val_loss2": 0.6379, "_timestamp": 1742353412, "_runtime": 162663} + +{"epoch": [8], "step": 12520, "train_loss": 0.7215, "val_loss": 0.6373, "val_loss2": 0.6374, "_timestamp": 1742377946, "_runtime": 187197} + +{"epoch": [9], "step": 14085, "train_loss": 0.7212, "val_loss": 0.6371, "val_loss2": 0.6369, "_timestamp": 1742402470, "_runtime": 211721} + +{"epoch": [10], "step": 15650, "train_loss": 0.7194, "val_loss": 0.6362, "val_loss2": 0.6362, "_timestamp": 1742426977, "_runtime": 236228} + +{"epoch": [11], "step": 17215, "train_loss": 0.7189, "val_loss": 0.6361, "val_loss2": 0.6358, "_timestamp": 1742451410, "_runtime": 260661} + diff --git a/Hindustani-70M-CNN/best_adapters.pt b/Hindustani-70M-CNN/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5f4bffddffde1993fe514c32366f29fa846aa62 --- /dev/null +++ b/Hindustani-70M-CNN/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05cee93fcd9d1d35c353446eabf2d1d32fcfb7e15baf7b52d5440433f04fe66b +size 237758893 diff --git a/Hindustani-70M-CNN/epoch_10_adapters.pt b/Hindustani-70M-CNN/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..71cb1646d5c3bde7fdec6947676246ed9fb832ee --- /dev/null +++ b/Hindustani-70M-CNN/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd3a95d457afdd79112308f02d59e8510379b8875b08a1470d2c93c0ca66a70a +size 237759237 diff --git a/Hindustani-70M-CNN/summary.jsonl b/Hindustani-70M-CNN/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e4c2f75fcc2260edf67de8a0416751c3739a9720 --- /dev/null +++ b/Hindustani-70M-CNN/summary.jsonl @@ -0,0 +1,26 @@ +{"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743671054", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 1565, "train_loss": 0.7311, "val_loss": 0.6419, "val_loss2": 0.6417, "_timestamp": 1743682272, "_runtime": 11217} + +{"epoch": [2], "step": 3130, "train_loss": 0.7245, "val_loss": 0.6383, "val_loss2": 0.6388, "_timestamp": 1743699666, "_runtime": 28611} + +{"epoch": [3], "step": 4695, "train_loss": 0.722, "val_loss": 0.6371, "val_loss2": 0.6371, "_timestamp": 1743716290, "_runtime": 45235} + +{"epoch": [4], "step": 6260, "train_loss": 0.7198, "val_loss": 0.6357, "val_loss2": 0.6353, "_timestamp": 1743732866, "_runtime": 61811} + +{"epoch": [5], "step": 7825, "train_loss": 0.719, "val_loss": 0.6343, "val_loss2": 0.6342, "_timestamp": 1743749482, "_runtime": 78427} + +{"epoch": [6], "step": 9390, "train_loss": 0.7168, "val_loss": 0.6329, "val_loss2": 0.633, "_timestamp": 1743767092, "_runtime": 96037} + +{"epoch": [7], "step": 10955, "train_loss": 0.7165, "val_loss": 0.6326, "val_loss2": 0.6325, "_timestamp": 1743784991, "_runtime": 113936} + +{"epoch": [8], "step": 12520, "train_loss": 0.7163, "val_loss": 0.6317, "val_loss2": 0.6319, "_timestamp": 1743803174, "_runtime": 132119} + +{"epoch": [9], "step": 14085, "train_loss": 0.7153, "val_loss": 0.6319, "val_loss2": 0.6317, "_timestamp": 1743820863, "_runtime": 149808} + +{"epoch": [10], "step": 15650, "train_loss": 0.7144, "val_loss": 0.631, "val_loss2": 0.6311, "_timestamp": 1743838467, "_runtime": 167412} + +{"epoch": [11], "step": 17215, "train_loss": 0.7139, "val_loss": 0.6307, "val_loss2": 0.6303, "_timestamp": 1743856327, "_runtime": 185272} + +{"epoch": [12], "step": 18780, "train_loss": 0.7144, "val_loss": 0.6299, "val_loss2": 0.6301, "_timestamp": 1743874113, "_runtime": 203058} + diff --git a/Hindustani-70M-Transformer/best_adapters.pt b/Hindustani-70M-Transformer/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..204f8564e24a9697c15f2c50141f40ca12ec72e9 --- /dev/null +++ b/Hindustani-70M-Transformer/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a32af19c15598b6447aff8c61befd6c4c6d86dcabd016e0202e71161caf76e47 +size 225705984 diff --git a/Hindustani-70M-Transformer/epoch_10_adapters.pt b/Hindustani-70M-Transformer/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ffe05901e28fe46277be7baec4beffa93295816 --- /dev/null +++ b/Hindustani-70M-Transformer/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315b320c0118a5f3c1ac596cefc50c9e197f6eade205dc499f71fd3c7d1556fc +size 224657408 diff --git a/Hindustani-70M-Transformer/summary.jsonl b/Hindustani-70M-Transformer/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5fbe7136960cc8d825375bf9975d4a1bb8beea2e --- /dev/null +++ b/Hindustani-70M-Transformer/summary.jsonl @@ -0,0 +1,26 @@ +{"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1741776778", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 1565, "train_loss": 0.7426, "val_loss": 0.6411, "val_loss2": 0.641} + +{"epoch": [2], "step": 3130, "train_loss": 0.7254, "val_loss": 0.6354, "val_loss2": 0.6355} + +{"epoch": [3], "step": 4695, "train_loss": 0.7216, "val_loss": 0.6326, "val_loss2": 0.6326} + +{"epoch": [4], "step": 6260, "train_loss": 0.7189, "val_loss": 0.6312, "val_loss2": 0.631} + +{"epoch": [5], "step": 7825, "train_loss": 0.7186, "val_loss": 0.6297, "val_loss2": 0.6298} + +{"epoch": [6], "step": 9390, "train_loss": 0.7166, "val_loss": 0.6285, "val_loss2": 0.6287} + +{"epoch": [7], "step": 10955, "train_loss": 0.7162, "val_loss": 0.6279, "val_loss2": 0.6279} + +{"epoch": [8], "step": 12520, "train_loss": 0.716, "val_loss": 0.6273, "val_loss2": 0.6273} + +{"epoch": [9], "step": 14085, "train_loss": 0.7148, "val_loss": 0.6273, "val_loss2": 0.6272} + +{"epoch": [10], "step": 15650, "train_loss": 0.7137, "val_loss": 0.6262, "val_loss2": 0.6262} + +{"epoch": [11], "step": 17215, "train_loss": 0.714, "val_loss": 0.6258, "val_loss2": 0.6257} + +{"epoch": [12], "step": 18780, "train_loss": 0.7138, "val_loss": 0.6252, "val_loss2": 0.6254} + diff --git a/Hindustani-8M-CNN/best_adapters.pt b/Hindustani-8M-CNN/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad0de9744a0a7d76a0cc93a35f693929c291f5ef --- /dev/null +++ b/Hindustani-8M-CNN/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2edffcfa5c98d06e1e12952344db9a5390dd7b4e172e50b7dc42fae819c0ded5 +size 30130069 diff --git a/Hindustani-8M-CNN/epoch_10_adapters.pt b/Hindustani-8M-CNN/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b842af89f4e5f4abf892f6e5a362fb0a6849e8c --- /dev/null +++ b/Hindustani-8M-CNN/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1328aa3d5ae31ff71b1b730d1abd991a89539bec70979ac7184309d270906455 +size 30130917 diff --git a/Hindustani-8M-CNN/summary.jsonl b/Hindustani-8M-CNN/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..cd45db668ec472ebc16579154f81ad051cac01bb --- /dev/null +++ b/Hindustani-8M-CNN/summary.jsonl @@ -0,0 +1,32 @@ +{"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1742452722", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 1565, "train_loss": 0.7361, "val_loss": 0.6453, "val_loss2": 0.6451, "_timestamp": 1742463550, "_runtime": 10827} + +{"epoch": [2], "step": 3130, "train_loss": 0.7277, "val_loss": 0.6425, "val_loss2": 0.643, "_timestamp": 1742481762, "_runtime": 29039} + +{"epoch": [3], "step": 4695, "train_loss": 0.7257, "val_loss": 0.6413, "val_loss2": 0.6413, "_timestamp": 1742500242, "_runtime": 47519} + +{"epoch": [4], "step": 6260, "train_loss": 0.7237, "val_loss": 0.6405, "val_loss2": 0.6402, "_timestamp": 1742518376, "_runtime": 65653} + +{"epoch": [5], "step": 7825, "train_loss": 0.724, "val_loss": 0.6395, "val_loss2": 0.6394, "_timestamp": 1742536089, "_runtime": 83366} + +{"epoch": [6], "step": 9390, "train_loss": 0.7224, "val_loss": 0.6391, "val_loss2": 0.639, "_timestamp": 1742553681, "_runtime": 100958} + +{"epoch": [7], "step": 10955, "train_loss": 0.7225, "val_loss": 0.6384, "val_loss2": 0.6382, "_timestamp": 1742571932, "_runtime": 119209} + +{"epoch": [8], "step": 12520, "train_loss": 0.7227, "val_loss": 0.6381, "val_loss2": 0.6383, "_timestamp": 1742590268, "_runtime": 137545} + +{"epoch": [9], "step": 14085, "train_loss": 0.7223, "val_loss": 0.6375, "val_loss2": 0.6378, "_timestamp": 1742607987, "_runtime": 155264} + +{"epoch": [10], "step": 15650, "train_loss": 0.7207, "val_loss": 0.6369, "val_loss2": 0.6369, "_timestamp": 1742625742, "_runtime": 173019} + +{"epoch": [11], "step": 17215, "train_loss": 0.7204, "val_loss": 0.6371, "val_loss2": 0.6369, "_timestamp": 1742643602, "_runtime": 190879} + +{"epoch": [12], "step": 18780, "train_loss": 0.7207, "val_loss": 0.6364, "val_loss2": 0.6367, "_timestamp": 1742661425, "_runtime": 208702} + +{"epoch": [13], "step": 20345, "train_loss": 0.72, "val_loss": 0.6367, "val_loss2": 0.6367, "_timestamp": 1742678826, "_runtime": 226103} + +{"epoch": [14], "step": 21910, "train_loss": 0.7192, "val_loss": 0.636, "val_loss2": 0.6358, "_timestamp": 1742696237, "_runtime": 243514} + +{"epoch": [15], "step": 23475, "train_loss": 0.72, "val_loss": 0.6361, "val_loss2": 0.6358, "_timestamp": 1742713587, "_runtime": 260864} + diff --git a/Hindustani-8M-Transformer/best_adapters.pt b/Hindustani-8M-Transformer/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..048bbcc19e7c88192efc951962b1224dc4431780 --- /dev/null +++ b/Hindustani-8M-Transformer/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:542e668ca30268ff9c0005fe16de2b880c26c2b9ce46aacfaf40aa88bbdd017a +size 36565318 diff --git a/Hindustani-8M-Transformer/epoch_10_adapters.pt b/Hindustani-8M-Transformer/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ca195aded9822ae77849d715a40004f41db5196 --- /dev/null +++ b/Hindustani-8M-Transformer/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62152d880c5d883214a6429b6ae3c46d15f2bcbeffdf4808665f1f0ffd90a11f +size 36566034 diff --git a/Hindustani-8M-Transformer/epoch_20_adapters.pt b/Hindustani-8M-Transformer/epoch_20_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..610213ec31d36b1c4dd2fa61003b66ab9374175c --- /dev/null +++ b/Hindustani-8M-Transformer/epoch_20_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13092ebbcfe7261fb5bf9e9533bef0f1008b9d8dbe63c298be983bdcebc47c03 +size 36566034 diff --git a/Hindustani-8M-Transformer/epoch_30_adapters.pt b/Hindustani-8M-Transformer/epoch_30_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..43454c735cdccd3d47e0c9ec6381e0040d88da5c --- /dev/null +++ b/Hindustani-8M-Transformer/epoch_30_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c89cc516c7aa346f8f7eed31fe46ac24063e0b627785a76e6ae7a07691d325a5 +size 36566034 diff --git a/Hindustani-8M-Transformer/summary.jsonl b/Hindustani-8M-Transformer/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b0543b274234a78350dfcd296e71fb16f1619a1b --- /dev/null +++ b/Hindustani-8M-Transformer/summary.jsonl @@ -0,0 +1,76 @@ +{"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1741086279", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 1565, "train_loss": 0.737, "val_loss": 0.6425, "val_loss2": 0.6423, "_timestamp": 1741093953, "_runtime": 7673} + +{"epoch": [2], "step": 3130, "train_loss": 0.7272, "val_loss": 0.6386, "val_loss2": 0.6388, "_timestamp": 1741105095, "_runtime": 18815} + +{"epoch": [3], "step": 4695, "train_loss": 0.7246, "val_loss": 0.6368, "val_loss2": 0.6366, "_timestamp": 1741116458, "_runtime": 30178} + +{"epoch": [4], "step": 6260, "train_loss": 0.7219, "val_loss": 0.6357, "val_loss2": 0.6355, "_timestamp": 1741127983, "_runtime": 41703} + +{"epoch": [5], "step": 7825, "train_loss": 0.7217, "val_loss": 0.6346, "val_loss2": 0.6347, "_timestamp": 1741139501, "_runtime": 53221} + +{"epoch": [6], "step": 9390, "train_loss": 0.7211, "val_loss": 0.6338, "val_loss2": 0.6339, "_timestamp": 1741151021, "_runtime": 64741} + +{"epoch": [7], "step": 10955, "train_loss": 0.7196, "val_loss": 0.6333, "val_loss2": 0.6333, "_timestamp": 1741163406, "_runtime": 77126} + +{"epoch": [8], "step": 12520, "train_loss": 0.7205, "val_loss": 0.6328, "val_loss2": 0.6328, "_timestamp": 1741174640, "_runtime": 88360} + +{"epoch": [9], "step": 14085, "train_loss": 0.7196, "val_loss": 0.6326, "val_loss2": 0.6326, "_timestamp": 1741186128, "_runtime": 99848} + +{"epoch": [10], "step": 15650, "train_loss": 0.7183, "val_loss": 0.6321, "val_loss2": 0.6321, "_timestamp": 1741197738, "_runtime": 111458} + +{"epoch": [11], "step": 17215, "train_loss": 0.7183, "val_loss": 0.6319, "val_loss2": 0.6317, "_timestamp": 1741209314, "_runtime": 123034} + +{"epoch": [12], "step": 18780, "train_loss": 0.7189, "val_loss": 0.6313, "val_loss2": 0.6315, "_timestamp": 1741220875, "_runtime": 134595} + +{"epoch": [13], "step": 20345, "train_loss": 0.7184, "val_loss": 0.6316, "val_loss2": 0.6314, "_timestamp": 1741232412, "_runtime": 146132} + +{"epoch": [14], "step": 21910, "train_loss": 0.7175, "val_loss": 0.6312, "val_loss2": 0.6309, "_timestamp": 1741243935, "_runtime": 157655} + +{"epoch": [15], "step": 23475, "train_loss": 0.7182, "val_loss": 0.6309, "val_loss2": 0.6306, "_timestamp": 1741255173, "_runtime": 168893} + +{"epoch": [16], "step": 25040, "train_loss": 0.7178, "val_loss": 0.6305, "val_loss2": 0.6306, "_timestamp": 1741266408, "_runtime": 180128} + +{"epoch": [17], "step": 26605, "train_loss": 0.7172, "val_loss": 0.6304, "val_loss2": 0.6305, "_timestamp": 1741277843, "_runtime": 191563} + +{"epoch": [18], "step": 28170, "train_loss": 0.7168, "val_loss": 0.63, "val_loss2": 0.6302, "_timestamp": 1741289372, "_runtime": 203092} + +{"epoch": [19], "step": 29735, "train_loss": 0.7151, "val_loss": 0.6301, "val_loss2": 0.63, "_timestamp": 1741300958, "_runtime": 214678} + +{"epoch": [20], "step": 31300, "train_loss": 0.7164, "val_loss": 0.6296, "val_loss2": 0.6298, "_timestamp": 1741312540, "_runtime": 226260} + +{"epoch": [21], "step": 32865, "train_loss": 0.7183, "val_loss": 0.6298, "val_loss2": 0.6298, "_timestamp": 1741324094, "_runtime": 237814} + +{"epoch": [22], "step": 34430, "train_loss": 0.7142, "val_loss": 0.6297, "val_loss2": 0.6296, "_timestamp": 1741335262, "_runtime": 248982} + +{"epoch": [23], "step": 35995, "train_loss": 0.7173, "val_loss": 0.6294, "val_loss2": 0.6293, "_timestamp": 1741346585, "_runtime": 260305} + +{"epoch": [24], "step": 37560, "train_loss": 0.7155, "val_loss": 0.6293, "val_loss2": 0.6292, "_timestamp": 1741358197, "_runtime": 271917} + +{"epoch": [25], "step": 39125, "train_loss": 0.715, "val_loss": 0.6293, "val_loss2": 0.6294, "_timestamp": 1741369927, "_runtime": 283647} + +{"epoch": [26], "step": 40690, "train_loss": 0.7161, "val_loss": 0.6289, "val_loss2": 0.6292, "_timestamp": 1741381523, "_runtime": 295243} + +{"epoch": [27], "step": 42255, "train_loss": 0.7151, "val_loss": 0.6289, "val_loss2": 0.629, "_timestamp": 1741393101, "_runtime": 306821} + +{"epoch": [28], "step": 43820, "train_loss": 0.716, "val_loss": 0.6287, "val_loss2": 0.6287, "_timestamp": 1741405033, "_runtime": 318753} + +{"epoch": [29], "step": 45385, "train_loss": 0.7151, "val_loss": 0.6287, "val_loss2": 0.6289, "_timestamp": 1741416612, "_runtime": 330332} + +{"epoch": [30], "step": 46950, "train_loss": 0.7158, "val_loss": 0.6289, "val_loss2": 0.6286, "_timestamp": 1741428200, "_runtime": 341920} + +{"epoch": [31], "step": 48515, "train_loss": 0.7143, "val_loss": 0.6283, "val_loss2": 0.6287, "_timestamp": 1741440185, "_runtime": 353905} + +{"epoch": [32], "step": 50080, "train_loss": 0.7147, "val_loss": 0.6285, "val_loss2": 0.6285, "_timestamp": 1741452877, "_runtime": 366597} + +{"epoch": [33], "step": 51645, "train_loss": 0.7141, "val_loss": 0.6286, "val_loss2": 0.6284, "_timestamp": 1741464341, "_runtime": 378061} + +{"epoch": [34], "step": 53210, "train_loss": 0.7144, "val_loss": 0.6282, "val_loss2": 0.6283, "_timestamp": 1741476021, "_runtime": 389741} + +{"epoch": [35], "step": 54775, "train_loss": 0.7142, "val_loss": 0.6284, "val_loss2": 0.6281, "_timestamp": 1741488730, "_runtime": 402450} + +{"epoch": [36], "step": 56340, "train_loss": 0.7135, "val_loss": 0.628, "val_loss2": 0.628, "_timestamp": 1741501554, "_runtime": 415274} + +{"epoch": [37], "step": 57905, "train_loss": 0.7126, "val_loss": 0.6281, "val_loss2": 0.6282, "_timestamp": 1741514134, "_runtime": 427854} + diff --git a/Makam-20M-CNN/best_adapters.pt b/Makam-20M-CNN/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c6bba0d9a37c6bbe64768a4647d30daf907585c --- /dev/null +++ b/Makam-20M-CNN/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589da21dc6da733e8eb4d386684429eee3aae4a2adf90164c13709493fcb83ed +size 89348293 diff --git a/Makam-20M-CNN/epoch_10_adapters.pt b/Makam-20M-CNN/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b02fb4c92a8cb6c8f89b9e5667afb7a73b5f8b5 --- /dev/null +++ b/Makam-20M-CNN/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b145760b79ed6a52b82da384721b29542556a594401c7c5ccc7d8bfb237dfde7 +size 89348805 diff --git a/Makam-20M-CNN/summary.jsonl b/Makam-20M-CNN/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..484800ec53a48ac3f9ab459a8e4c22685cbc4a27 --- /dev/null +++ b/Makam-20M-CNN/summary.jsonl @@ -0,0 +1,26 @@ +{"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": false, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743793947", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": 1, "step": 2356, "train_loss": 0.7477, "val_loss": 0.6585, "val_loss2": 0.657} + +{"epoch": 2, "step": 4712, "train_loss": 0.7414, "val_loss": 0.6555, "val_loss2": 0.6554} + +{"epoch": 3, "step": 7068, "train_loss": 0.7397, "val_loss": 0.6555, "val_loss2": 0.6541} + +{"epoch": 4, "step": 9424, "train_loss": 0.7382, "val_loss": 0.6536, "val_loss2": 0.6532} + +{"epoch": 5, "step": 11780, "train_loss": 0.7384, "val_loss": 0.6538, "val_loss2": 0.6533} + +{"epoch": 6, "step": 14136, "train_loss": 0.7358, "val_loss": 0.6533, "val_loss2": 0.6524} + +{"epoch": 7, "step": 16492, "train_loss": 0.738, "val_loss": 0.6523, "val_loss2": 0.6522} + +{"epoch": 8, "step": 18848, "train_loss": 0.7383, "val_loss": 0.6533, "val_loss2": 0.6516} + +{"epoch": 9, "step": 21204, "train_loss": 0.7368, "val_loss": 0.6519, "val_loss2": 0.6515} + +{"epoch": 10, "step": 23560, "train_loss": 0.734, "val_loss": 0.6517, "val_loss2": 0.6511} + +{"epoch": 11, "step": 25916, "train_loss": 0.7363, "val_loss": 0.652, "val_loss2": 0.6507} + +{"epoch": 12, "step": 28272, "train_loss": 0.734, "val_loss": 0.6503, "val_loss2": 0.6502} + diff --git a/Makam-2M-CNN/best_adapters.pt b/Makam-2M-CNN/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe2b351d253d1ca9671eb159ba22d478e14f357c --- /dev/null +++ b/Makam-2M-CNN/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b24adf7ac33e9178f7b82dc1add0f16f43a3acdfa5fdb8a661518865b66f6fc +size 6025775 diff --git a/Makam-2M-CNN/epoch_10_adapters.pt b/Makam-2M-CNN/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..955e525e814825e66960d0688896dbc367d094e1 --- /dev/null +++ b/Makam-2M-CNN/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5641481689053fefdd6d4e9c9d9147e30ac323e3f6f758126f3a3cb0cdd8147e +size 6025951 diff --git a/Makam-2M-CNN/summary.jsonl b/Makam-2M-CNN/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..314092ddd752378a72e910f6ffbf9102576a5bc1 --- /dev/null +++ b/Makam-2M-CNN/summary.jsonl @@ -0,0 +1,26 @@ +{"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": false, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1744091442", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": 1, "step": 2356, "train_loss": 0.7514, "val_loss": 0.6619, "val_loss2": 0.6603} + +{"epoch": 2, "step": 4712, "train_loss": 0.7445, "val_loss": 0.6591, "val_loss2": 0.659} + +{"epoch": 3, "step": 7068, "train_loss": 0.7422, "val_loss": 0.6582, "val_loss2": 0.6578} + +{"epoch": 4, "step": 9424, "train_loss": 0.7414, "val_loss": 0.6579, "val_loss2": 0.6574} + +{"epoch": 5, "step": 11780, "train_loss": 0.7421, "val_loss": 0.658, "val_loss2": 0.657} + +{"epoch": 6, "step": 14136, "train_loss": 0.7414, "val_loss": 0.657, "val_loss2": 0.6564} + +{"epoch": 7, "step": 16492, "train_loss": 0.7419, "val_loss": 0.6562, "val_loss2": 0.6563} + +{"epoch": 8, "step": 18848, "train_loss": 0.742, "val_loss": 0.6557, "val_loss2": 0.656} + +{"epoch": 9, "step": 21204, "train_loss": 0.7416, "val_loss": 0.6566, "val_loss2": 0.6558} + +{"epoch": 10, "step": 23560, "train_loss": 0.739, "val_loss": 0.6556, "val_loss2": 0.6556} + +{"epoch": 11, "step": 25916, "train_loss": 0.7415, "val_loss": 0.6557, "val_loss2": 0.6553} + +{"epoch": 12, "step": 28272, "train_loss": 0.7391, "val_loss": 0.6554, "val_loss2": 0.6549} + diff --git a/Makam-40M-CNN/best_adapters.pt b/Makam-40M-CNN/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..a791d2f9fc4a608d69756b3d52e30830ed0a2df2 --- /dev/null +++ b/Makam-40M-CNN/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ccb7b736a623fa20ec16be2ad8c52366336f26aa59089609cc19004adad4325 +size 178445189 diff --git a/Makam-40M-CNN/epoch_10_adapters.pt b/Makam-40M-CNN/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..58e56913a131ceca7b0f8a596a9fd29c3998fec3 --- /dev/null +++ b/Makam-40M-CNN/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6726e1df1e7d575cba093b05e7442423e5e6c62721fa04796fbe1a6e43c8990a +size 178445701 diff --git a/Makam-40M-CNN/summary.jsonl b/Makam-40M-CNN/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3fa3c4624d742c416c1a1f55e020aea65b8de4ac --- /dev/null +++ b/Makam-40M-CNN/summary.jsonl @@ -0,0 +1,28 @@ +{"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": false, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743635059", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": 1, "step": 2356, "train_loss": 0.7469, "val_loss": 0.6584, "val_loss2": 0.6558} + +{"epoch": 2, "step": 4712, "train_loss": 0.7405, "val_loss": 0.6542, "val_loss2": 0.6542} + +{"epoch": 3, "step": 7068, "train_loss": 0.7387, "val_loss": 0.6529, "val_loss2": 0.6528} + +{"epoch": 4, "step": 9424, "train_loss": 0.7368, "val_loss": 0.6534, "val_loss2": 0.6522} + +{"epoch": 5, "step": 11780, "train_loss": 0.7369, "val_loss": 0.6515, "val_loss2": 0.6518} + +{"epoch": 6, "step": 14136, "train_loss": 0.736, "val_loss": 0.6507, "val_loss2": 0.6511} + +{"epoch": 7, "step": 16492, "train_loss": 0.7362, "val_loss": 0.6521, "val_loss2": 0.6509} + +{"epoch": 8, "step": 18848, "train_loss": 0.7364, "val_loss": 0.6498, "val_loss2": 0.6505} + +{"epoch": 9, "step": 21204, "train_loss": 0.7355, "val_loss": 0.6505, "val_loss2": 0.6502} + +{"epoch": 10, "step": 23560, "train_loss": 0.7335, "val_loss": 0.6499, "val_loss2": 0.6497} + +{"epoch": 11, "step": 25916, "train_loss": 0.7342, "val_loss": 0.649, "val_loss2": 0.6493} + +{"epoch": 12, "step": 28272, "train_loss": 0.7328, "val_loss": 0.6488, "val_loss2": 0.6491} + +{"epoch": 13, "step": 30628, "train_loss": 0.7329, "val_loss": 0.6488, "val_loss2": 0.649} + diff --git a/Makam-40M-Transformer/best_adapters.pt b/Makam-40M-Transformer/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..5667bca34023f105ef279b304a92fc23e0fe9cd3 --- /dev/null +++ b/Makam-40M-Transformer/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b28940e3aa8bbaa5c89f16f590b24a07b1a6d5eb149c12103d528df20e917fa +size 146260733 diff --git a/Makam-40M-Transformer/epoch_10_adapters.pt b/Makam-40M-Transformer/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e7d99c8da5697b6a44abb19c927da3f7cc05c37 --- /dev/null +++ b/Makam-40M-Transformer/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d6e85d3826cb11c10c5d7d5b4c1ec87fa1e4b313a37664cff941bf0153480e +size 146263541 diff --git a/Makam-40M-Transformer/summary.jsonl b/Makam-40M-Transformer/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6d4b2bae46bbff737571f66c4fd4b7660a282223 --- /dev/null +++ b/Makam-40M-Transformer/summary.jsonl @@ -0,0 +1,24 @@ +{"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": false, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743409928", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": 1, "step": 2356, "train_loss": 0.7545, "val_loss": 0.661, "val_loss2": 0.6601} + +{"epoch": 2, "step": 4712, "train_loss": 0.7435, "val_loss": 0.6579, "val_loss2": 0.6572} + +{"epoch": 3, "step": 7068, "train_loss": 0.7413, "val_loss": 0.6568, "val_loss2": 0.6559} + +{"epoch": 4, "step": 9424, "train_loss": 0.7389, "val_loss": 0.655, "val_loss2": 0.6548} + +{"epoch": 5, "step": 11780, "train_loss": 0.7394, "val_loss": 0.6542, "val_loss2": 0.6537} + +{"epoch": 6, "step": 14136, "train_loss": 0.7377, "val_loss": 0.6541, "val_loss2": 0.6532} + +{"epoch": 7, "step": 16492, "train_loss": 0.7388, "val_loss": 0.6526, "val_loss2": 0.6528} + +{"epoch": 8, "step": 18848, "train_loss": 0.7391, "val_loss": 0.6531, "val_loss2": 0.6525} + +{"epoch": 9, "step": 21204, "train_loss": 0.7373, "val_loss": 0.6523, "val_loss2": 0.6522} + +{"epoch": 10, "step": 23560, "train_loss": 0.7355, "val_loss": 0.6517, "val_loss2": 0.6518} + +{"epoch": 11, "step": 25916, "train_loss": 0.7366, "val_loss": 0.6516, "val_loss2": 0.6517} + diff --git a/Makam-70M-CNN/best_adapters.pt b/Makam-70M-CNN/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..e82ef18a87f39f6cd3c649709f078034901ed691 --- /dev/null +++ b/Makam-70M-CNN/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c46e98bb13cced40257758f1aa2a734849eff7b693bb6d2c8ab7954b1293b73 +size 237758893 diff --git a/Makam-70M-CNN/epoch_10_adapters.pt b/Makam-70M-CNN/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d85abb34777cac3ea9579211b62a991646b15d5 --- /dev/null +++ b/Makam-70M-CNN/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ccfa3d5fc162e769f0a329d7514e22612410054c99c4d8d1785f15d0d91932f +size 237759237 diff --git a/Makam-70M-CNN/summary.jsonl b/Makam-70M-CNN/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e38fab0982127767dde1a7308a37e07c66f1c9cb --- /dev/null +++ b/Makam-70M-CNN/summary.jsonl @@ -0,0 +1,32 @@ +{"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1742539331", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 2356, "train_loss": 0.7458, "val_loss": 0.6554, "val_loss2": 0.655} + +{"epoch": [2], "step": 4712, "train_loss": 0.7402, "val_loss": 0.6537, "val_loss2": 0.6532} + +{"epoch": [3], "step": 7068, "train_loss": 0.7373, "val_loss": 0.652, "val_loss2": 0.652} + +{"epoch": [4], "step": 9424, "train_loss": 0.7362, "val_loss": 0.6519, "val_loss2": 0.6512} + +{"epoch": [5], "step": 11780, "train_loss": 0.7357, "val_loss": 0.6516, "val_loss2": 0.6504} + +{"epoch": [6], "step": 14136, "train_loss": 0.7348, "val_loss": 0.6512, "val_loss2": 0.6502} + +{"epoch": [7], "step": 16492, "train_loss": 0.7347, "val_loss": 0.6502, "val_loss2": 0.6496} + +{"epoch": [8], "step": 18848, "train_loss": 0.7351, "val_loss": 0.6483, "val_loss2": 0.649} + +{"epoch": [9], "step": 21204, "train_loss": 0.734, "val_loss": 0.6492, "val_loss2": 0.6488} + +{"epoch": [10], "step": 23560, "train_loss": 0.7314, "val_loss": 0.6481, "val_loss2": 0.6482} + +{"epoch": [11], "step": 25916, "train_loss": 0.7328, "val_loss": 0.6477, "val_loss2": 0.6482} + +{"epoch": [12], "step": 28272, "train_loss": 0.731, "val_loss": 0.6473, "val_loss2": 0.6475} + +{"epoch": [13], "step": 30628, "train_loss": 0.7315, "val_loss": 0.648, "val_loss2": 0.6476} + +{"epoch": [14], "step": 32984, "train_loss": 0.7321, "val_loss": 0.6491, "val_loss2": 0.6473} + +{"epoch": [15], "step": 35340, "train_loss": 0.7308, "val_loss": 0.6472, "val_loss2": 0.6471} + diff --git a/Makam-70M-Transformer/best_adapters.pt b/Makam-70M-Transformer/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b0b141f8cadad5e8cc77ef1abae3e084fec3283 --- /dev/null +++ b/Makam-70M-Transformer/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08eeaab800a1511a2ce78db933b6cdd805cfb6c57fb8efd47561bbcf49514f46 +size 292523194 diff --git a/Makam-70M-Transformer/epoch_10_adapters.pt b/Makam-70M-Transformer/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8769158e1b28a04fdf7c181242fee58d15f4205 --- /dev/null +++ b/Makam-70M-Transformer/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8695a028a169066c9db0c8427aa1cc177306da4f6c1568a5a3b639ff36583c1 +size 292528810 diff --git a/Makam-70M-Transformer/summary.jsonl b/Makam-70M-Transformer/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..35156666a67648409c6dfebbca56d527332cf746 --- /dev/null +++ b/Makam-70M-Transformer/summary.jsonl @@ -0,0 +1,28 @@ +{"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1741802495", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 2356, "train_loss": 0.7555, "val_loss": 0.6576, "val_loss2": 0.6576, "_timestamp": 1741815555, "_runtime": 13060} + +{"epoch": [2], "step": 4712, "train_loss": 0.7415, "val_loss": 0.6543, "val_loss2": 0.6547, "_timestamp": 1741828919, "_runtime": 26424} + +{"epoch": [3], "step": 7068, "train_loss": 0.739, "val_loss": 0.6531, "val_loss2": 0.6528, "_timestamp": 1741844156, "_runtime": 41661} + +{"epoch": [4], "step": 9424, "train_loss": 0.737, "val_loss": 0.6517, "val_loss2": 0.6522, "_timestamp": 1741859740, "_runtime": 57245} + +{"epoch": [5], "step": 11780, "train_loss": 0.7377, "val_loss": 0.6509, "val_loss2": 0.651, "_timestamp": 1741880157, "_runtime": 77662} + +{"epoch": [6], "step": 14136, "train_loss": 0.7356, "val_loss": 0.6504, "val_loss2": 0.6506, "_timestamp": 1741896167, "_runtime": 93672} + +{"epoch": [7], "step": 16492, "train_loss": 0.7361, "val_loss": 0.6494, "val_loss2": 0.6499, "_timestamp": 1741911385, "_runtime": 108890} + +{"epoch": [8], "step": 18848, "train_loss": 0.736, "val_loss": 0.6494, "val_loss2": 0.6496, "_timestamp": 1741925596, "_runtime": 123101} + +{"epoch": [9], "step": 21204, "train_loss": 0.735, "val_loss": 0.6482, "val_loss2": 0.6488, "_timestamp": 1741939137, "_runtime": 136642} + +{"epoch": [10], "step": 23560, "train_loss": 0.7326, "val_loss": 0.6482, "val_loss2": 0.6488, "_timestamp": 1741958327, "_runtime": 155832} + +{"epoch": [11], "step": 25916, "train_loss": 0.7342, "val_loss": 0.6477, "val_loss2": 0.6482, "_timestamp": 1741980523, "_runtime": 178028} + +{"epoch": [12], "step": 28272, "train_loss": 0.7324, "val_loss": 0.6474, "val_loss2": 0.6478, "_timestamp": 1741995386, "_runtime": 192891} + +{"epoch": [13], "step": 30628, "train_loss": 0.7319, "val_loss": 0.648, "val_loss2": 0.648, "_timestamp": 1742011261, "_runtime": 208766} + diff --git a/Makam-8M-CNN/best_adapters.pt b/Makam-8M-CNN/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..55cd30f42e05f4ac1c0db463782c29a7c0c7c4e9 --- /dev/null +++ b/Makam-8M-CNN/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14874d4ee64940776deaf3d1754f853625eafb449945b338144f991037a20254 +size 30130514 diff --git a/Makam-8M-CNN/epoch_10_adapters.pt b/Makam-8M-CNN/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..33c078484f32caf43d48071653447bcc3f014a09 --- /dev/null +++ b/Makam-8M-CNN/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f8e6a9bc87c239542333967b03c36f737ee472d2c236adbf0e9608960e1c5a +size 30131370 diff --git a/Makam-8M-CNN/summary.jsonl b/Makam-8M-CNN/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bdec04c0c4060fe5e4933c3bba52fe4f21ee27bf --- /dev/null +++ b/Makam-8M-CNN/summary.jsonl @@ -0,0 +1,24 @@ +{"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1742296544", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": [1], "step": 2356, "train_loss": 0.7479, "val_loss": 0.6551, "val_loss2": 0.655, "_timestamp": 1742305448, "_runtime": 8904} + +{"epoch": [2], "step": 4712, "train_loss": 0.7401, "val_loss": 0.6537, "val_loss2": 0.654, "_timestamp": 1742317595, "_runtime": 21051} + +{"epoch": [3], "step": 7068, "train_loss": 0.7388, "val_loss": 0.653, "val_loss2": 0.653, "_timestamp": 1742325830, "_runtime": 29286} + +{"epoch": [4], "step": 9424, "train_loss": 0.7368, "val_loss": 0.6514, "val_loss2": 0.6518, "_timestamp": 1742333613, "_runtime": 37069} + +{"epoch": [5], "step": 11780, "train_loss": 0.7381, "val_loss": 0.6513, "val_loss2": 0.6515, "_timestamp": 1742341365, "_runtime": 44821} + +{"epoch": [6], "step": 14136, "train_loss": 0.736, "val_loss": 0.6507, "val_loss2": 0.6509, "_timestamp": 1742349126, "_runtime": 52582} + +{"epoch": [7], "step": 16492, "train_loss": 0.7367, "val_loss": 0.6502, "val_loss2": 0.6506, "_timestamp": 1742356897, "_runtime": 60353} + +{"epoch": [8], "step": 18848, "train_loss": 0.7369, "val_loss": 0.6506, "val_loss2": 0.6509, "_timestamp": 1742365089, "_runtime": 68545} + +{"epoch": [9], "step": 21204, "train_loss": 0.7364, "val_loss": 0.6499, "val_loss2": 0.6502, "_timestamp": 1742375265, "_runtime": 78721} + +{"epoch": [10], "step": 23560, "train_loss": 0.7334, "val_loss": 0.6494, "val_loss2": 0.6499, "_timestamp": 1742387582, "_runtime": 91038} + +{"epoch": [11], "step": 25916, "train_loss": 0.7356, "val_loss": 0.6497, "val_loss2": 0.6495, "_timestamp": 1742396375, "_runtime": 99831} + diff --git a/Makam-8M-Transformer/best_adapters.pt b/Makam-8M-Transformer/best_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..f86912ac4d3ab02123e6c11518da9fc46b86971e --- /dev/null +++ b/Makam-8M-Transformer/best_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a77aa8783134c16fcd75b6cd02c891e0bc6f157be2c0e8c283addc2410ad770d +size 36564873 diff --git a/Makam-8M-Transformer/epoch_10_adapters.pt b/Makam-8M-Transformer/epoch_10_adapters.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa94c5e197336b505557be765c85108fffd3ae4c --- /dev/null +++ b/Makam-8M-Transformer/epoch_10_adapters.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00696e109edc9cda31372e34ec2f40095292b07cbd4aea1c38ff9e688109757d +size 36565581 diff --git a/Makam-8M-Transformer/summary.jsonl b/Makam-8M-Transformer/summary.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a8718d54308635eb09081561ed4d4dc596652e30 --- /dev/null +++ b/Makam-8M-Transformer/summary.jsonl @@ -0,0 +1,26 @@ +{"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": false, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743925785", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"} + +{"epoch": 1, "step": 2356, "train_loss": 0.7498, "val_loss": 0.6595, "val_loss2": 0.6586} + +{"epoch": 2, "step": 4712, "train_loss": 0.7426, "val_loss": 0.6572, "val_loss2": 0.657} + +{"epoch": 3, "step": 7068, "train_loss": 0.7411, "val_loss": 0.6564, "val_loss2": 0.6558} + +{"epoch": 4, "step": 9424, "train_loss": 0.7392, "val_loss": 0.6555, "val_loss2": 0.6551} + +{"epoch": 5, "step": 11780, "train_loss": 0.7398, "val_loss": 0.6559, "val_loss2": 0.6544} + +{"epoch": 6, "step": 14136, "train_loss": 0.7386, "val_loss": 0.654, "val_loss2": 0.654} + +{"epoch": 7, "step": 16492, "train_loss": 0.7393, "val_loss": 0.655, "val_loss2": 0.6538} + +{"epoch": 8, "step": 18848, "train_loss": 0.7394, "val_loss": 0.6544, "val_loss2": 0.6534} + +{"epoch": 9, "step": 21204, "train_loss": 0.7386, "val_loss": 0.6538, "val_loss2": 0.6533} + +{"epoch": 10, "step": 23560, "train_loss": 0.7362, "val_loss": 0.6535, "val_loss2": 0.6531} + +{"epoch": 11, "step": 25916, "train_loss": 0.7374, "val_loss": 0.6544, "val_loss2": 0.6528} + +{"epoch": 12, "step": 28272, "train_loss": 0.7363, "val_loss": 0.654, "val_loss2": 0.6525} +