athi180202 commited on
Commit
07393a7
·
verified ·
1 Parent(s): 21328ed

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Hindustani-20M-CNN/best_adapters.pt +3 -0
  2. Hindustani-20M-CNN/epoch_10_adapters.pt +3 -0
  3. Hindustani-20M-CNN/summary.jsonl +26 -0
  4. Hindustani-20M-Transformer/best_adapters.pt +3 -0
  5. Hindustani-20M-Transformer/epoch_10_adapters.pt +3 -0
  6. Hindustani-20M-Transformer/summary.jsonl +26 -0
  7. Hindustani-2M-CNN/best_adapters.pt +3 -0
  8. Hindustani-2M-CNN/epoch_10_adapters.pt +3 -0
  9. Hindustani-2M-CNN/summary.jsonl +24 -0
  10. Hindustani-2M-Transformer/best_adapters.pt +3 -0
  11. Hindustani-2M-Transformer/epoch_10_adapters.pt +3 -0
  12. Hindustani-2M-Transformer/summary.jsonl +28 -0
  13. Hindustani-40M-CNN/best_adapters.pt +3 -0
  14. Hindustani-40M-CNN/epoch_10_adapters.pt +3 -0
  15. Hindustani-40M-CNN/summary.jsonl +26 -0
  16. Hindustani-40M-Transformer/best_adapters.pt +3 -0
  17. Hindustani-40M-Transformer/epoch_10_adapters.pt +3 -0
  18. Hindustani-40M-Transformer/summary.jsonl +24 -0
  19. Hindustani-70M-CNN/best_adapters.pt +3 -0
  20. Hindustani-70M-CNN/epoch_10_adapters.pt +3 -0
  21. Hindustani-70M-CNN/summary.jsonl +26 -0
  22. Hindustani-70M-Transformer/best_adapters.pt +3 -0
  23. Hindustani-70M-Transformer/epoch_10_adapters.pt +3 -0
  24. Hindustani-70M-Transformer/summary.jsonl +26 -0
  25. Hindustani-8M-CNN/best_adapters.pt +3 -0
  26. Hindustani-8M-CNN/epoch_10_adapters.pt +3 -0
  27. Hindustani-8M-CNN/summary.jsonl +32 -0
  28. Hindustani-8M-Transformer/best_adapters.pt +3 -0
  29. Hindustani-8M-Transformer/epoch_10_adapters.pt +3 -0
  30. Hindustani-8M-Transformer/epoch_20_adapters.pt +3 -0
  31. Hindustani-8M-Transformer/epoch_30_adapters.pt +3 -0
  32. Hindustani-8M-Transformer/summary.jsonl +76 -0
  33. Makam-20M-CNN/best_adapters.pt +3 -0
  34. Makam-20M-CNN/epoch_10_adapters.pt +3 -0
  35. Makam-20M-CNN/summary.jsonl +26 -0
  36. Makam-2M-CNN/best_adapters.pt +3 -0
  37. Makam-2M-CNN/epoch_10_adapters.pt +3 -0
  38. Makam-2M-CNN/summary.jsonl +26 -0
  39. Makam-40M-CNN/best_adapters.pt +3 -0
  40. Makam-40M-CNN/epoch_10_adapters.pt +3 -0
  41. Makam-40M-CNN/summary.jsonl +28 -0
  42. Makam-40M-Transformer/best_adapters.pt +3 -0
  43. Makam-40M-Transformer/epoch_10_adapters.pt +3 -0
  44. Makam-40M-Transformer/summary.jsonl +24 -0
  45. Makam-70M-CNN/best_adapters.pt +3 -0
  46. Makam-70M-CNN/epoch_10_adapters.pt +3 -0
  47. Makam-70M-CNN/summary.jsonl +32 -0
  48. Makam-70M-Transformer/best_adapters.pt +3 -0
  49. Makam-70M-Transformer/epoch_10_adapters.pt +3 -0
  50. Makam-70M-Transformer/summary.jsonl +28 -0
Hindustani-20M-CNN/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:563543e44916a0bccb44d2e17c392f44644badaca93842cd35bfe813fc491330
3
+ size 89348293
Hindustani-20M-CNN/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8aad4e02e4c491f85ec60ac0f76aee9e9462ddf340cecf3d1fbd63e68f309de
3
+ size 89348805
Hindustani-20M-CNN/summary.jsonl ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743147931", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 1565, "train_loss": 0.7346, "val_loss": 0.6447, "val_loss2": 0.6446, "_timestamp": 1743165321, "_runtime": 17389}
4
+
5
+ {"epoch": [2], "step": 3130, "train_loss": 0.7279, "val_loss": 0.6418, "val_loss2": 0.6419, "_timestamp": 1743189912, "_runtime": 41980}
6
+
7
+ {"epoch": [3], "step": 4695, "train_loss": 0.7247, "val_loss": 0.6399, "val_loss2": 0.6399, "_timestamp": 1743214898, "_runtime": 66966}
8
+
9
+ {"epoch": [4], "step": 6260, "train_loss": 0.7226, "val_loss": 0.6386, "val_loss2": 0.6386, "_timestamp": 1743239210, "_runtime": 91278}
10
+
11
+ {"epoch": [5], "step": 7825, "train_loss": 0.7228, "val_loss": 0.6375, "val_loss2": 0.6375, "_timestamp": 1743263496, "_runtime": 115564}
12
+
13
+ {"epoch": [6], "step": 9390, "train_loss": 0.7202, "val_loss": 0.6366, "val_loss2": 0.6365, "_timestamp": 1743287951, "_runtime": 140019}
14
+
15
+ {"epoch": [7], "step": 10955, "train_loss": 0.7206, "val_loss": 0.636, "val_loss2": 0.6361, "_timestamp": 1743312250, "_runtime": 164318}
16
+
17
+ {"epoch": [8], "step": 12520, "train_loss": 0.7201, "val_loss": 0.6356, "val_loss2": 0.6354, "_timestamp": 1743330721, "_runtime": 182789}
18
+
19
+ {"epoch": [9], "step": 14085, "train_loss": 0.7188, "val_loss": 0.6356, "val_loss2": 0.6355, "_timestamp": 1743354070, "_runtime": 206138}
20
+
21
+ {"epoch": [10], "step": 15650, "train_loss": 0.7185, "val_loss": 0.6344, "val_loss2": 0.6344, "_timestamp": 1743377342, "_runtime": 229410}
22
+
23
+ {"epoch": [11], "step": 17215, "train_loss": 0.7177, "val_loss": 0.6343, "val_loss2": 0.6343, "_timestamp": 1743400710, "_runtime": 252778}
24
+
25
+ {"epoch": [12], "step": 18780, "train_loss": 0.7183, "val_loss": 0.6341, "val_loss2": 0.6342, "_timestamp": 1743424752, "_runtime": 276820}
26
+
Hindustani-20M-Transformer/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0065801ff7359194f5193ab694dd7751549f0e70af56d279989bfe2874fe2e74
3
+ size 73130181
Hindustani-20M-Transformer/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a66ff9a7306227b3cfd8c85c1a3d2bff3e566676a97a548cb40d2f8b053faaf
3
+ size 73131589
Hindustani-20M-Transformer/summary.jsonl ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1742715616", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 1565, "train_loss": 0.7348, "val_loss": 0.6459, "val_loss2": 0.6457, "_timestamp": 1742729556, "_runtime": 13939}
4
+
5
+ {"epoch": [2], "step": 3130, "train_loss": 0.7278, "val_loss": 0.6423, "val_loss2": 0.6425, "_timestamp": 1742750773, "_runtime": 35156}
6
+
7
+ {"epoch": [3], "step": 4695, "train_loss": 0.7251, "val_loss": 0.6406, "val_loss2": 0.6408, "_timestamp": 1742771679, "_runtime": 56062}
8
+
9
+ {"epoch": [4], "step": 6260, "train_loss": 0.7231, "val_loss": 0.6398, "val_loss2": 0.6397, "_timestamp": 1742793020, "_runtime": 77403}
10
+
11
+ {"epoch": [5], "step": 7825, "train_loss": 0.7227, "val_loss": 0.6387, "val_loss2": 0.6388, "_timestamp": 1742813435, "_runtime": 97818}
12
+
13
+ {"epoch": [6], "step": 9390, "train_loss": 0.7212, "val_loss": 0.6375, "val_loss2": 0.6376, "_timestamp": 1742834179, "_runtime": 118562}
14
+
15
+ {"epoch": [7], "step": 10955, "train_loss": 0.721, "val_loss": 0.6372, "val_loss2": 0.6371, "_timestamp": 1742854218, "_runtime": 138601}
16
+
17
+ {"epoch": [8], "step": 12520, "train_loss": 0.7206, "val_loss": 0.6364, "val_loss2": 0.6366, "_timestamp": 1742874289, "_runtime": 158672}
18
+
19
+ {"epoch": [9], "step": 14085, "train_loss": 0.7198, "val_loss": 0.6363, "val_loss2": 0.6362, "_timestamp": 1742894221, "_runtime": 178604}
20
+
21
+ {"epoch": [10], "step": 15650, "train_loss": 0.7191, "val_loss": 0.6355, "val_loss2": 0.6358, "_timestamp": 1742916916, "_runtime": 201299}
22
+
23
+ {"epoch": [11], "step": 17215, "train_loss": 0.7185, "val_loss": 0.6358, "val_loss2": 0.6354, "_timestamp": 1742937243, "_runtime": 221626}
24
+
25
+ {"epoch": [12], "step": 18780, "train_loss": 0.7195, "val_loss": 0.6352, "val_loss2": 0.6356, "_timestamp": 1742957525, "_runtime": 241908}
26
+
Hindustani-2M-CNN/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ad57b0bd253112940c9f27b7c4820db0da6f222069d48a93ff0ad68da2598e
3
+ size 6025775
Hindustani-2M-CNN/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0ce1f62b84d90dd02f4159fddf34609ded39217629805c0298d2d198204ca9
3
+ size 6025951
Hindustani-2M-CNN/summary.jsonl ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743875307", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 1565, "train_loss": 0.7403, "val_loss": 0.6505, "val_loss2": 0.6504, "_timestamp": 1743885783, "_runtime": 10474}
4
+
5
+ {"epoch": [2], "step": 3130, "train_loss": 0.7321, "val_loss": 0.6477, "val_loss2": 0.6479, "_timestamp": 1743902105, "_runtime": 26796}
6
+
7
+ {"epoch": [3], "step": 4695, "train_loss": 0.7297, "val_loss": 0.6466, "val_loss2": 0.6466, "_timestamp": 1743918387, "_runtime": 43078}
8
+
9
+ {"epoch": [4], "step": 6260, "train_loss": 0.728, "val_loss": 0.6458, "val_loss2": 0.6456, "_timestamp": 1743934107, "_runtime": 58798}
10
+
11
+ {"epoch": [5], "step": 7825, "train_loss": 0.7286, "val_loss": 0.6449, "val_loss2": 0.6449, "_timestamp": 1743950645, "_runtime": 75336}
12
+
13
+ {"epoch": [6], "step": 9390, "train_loss": 0.7271, "val_loss": 0.6448, "val_loss2": 0.6445, "_timestamp": 1743967320, "_runtime": 92011}
14
+
15
+ {"epoch": [7], "step": 10955, "train_loss": 0.7268, "val_loss": 0.6439, "val_loss2": 0.6437, "_timestamp": 1743983597, "_runtime": 108288}
16
+
17
+ {"epoch": [8], "step": 12520, "train_loss": 0.7275, "val_loss": 0.6436, "val_loss2": 0.6436, "_timestamp": 1743999422, "_runtime": 124113}
18
+
19
+ {"epoch": [9], "step": 14085, "train_loss": 0.7266, "val_loss": 0.6433, "val_loss2": 0.6432, "_timestamp": 1744015961, "_runtime": 140652}
20
+
21
+ {"epoch": [10], "step": 15650, "train_loss": 0.726, "val_loss": 0.6428, "val_loss2": 0.6429, "_timestamp": 1744031700, "_runtime": 156391}
22
+
23
+ {"epoch": [11], "step": 17215, "train_loss": 0.7253, "val_loss": 0.6429, "val_loss2": 0.6428, "_timestamp": 1744048427, "_runtime": 173118}
24
+
Hindustani-2M-Transformer/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be3afda934d4e45fcd20a41807c3ff1279187611dccb3b65a62ae944f1cb5d55
3
+ size 12808073
Hindustani-2M-Transformer/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:773ca7d871ab0a89236a2b40ad9ab2acd2a6a40c9c682642df201d8a59674015
3
+ size 12808781
Hindustani-2M-Transformer/summary.jsonl ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1744056337", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 1565, "train_loss": 0.7393, "val_loss": 0.6498, "val_loss2": 0.6501, "_timestamp": 1744068610, "_runtime": 12272}
4
+
5
+ {"epoch": [2], "step": 3130, "train_loss": 0.7313, "val_loss": 0.6473, "val_loss2": 0.6476, "_timestamp": 1744087699, "_runtime": 31361}
6
+
7
+ {"epoch": [3], "step": 4695, "train_loss": 0.729, "val_loss": 0.646, "val_loss2": 0.6461, "_timestamp": 1744105752, "_runtime": 49414}
8
+
9
+ {"epoch": [4], "step": 6260, "train_loss": 0.7282, "val_loss": 0.6452, "val_loss2": 0.6453, "_timestamp": 1744124547, "_runtime": 68209}
10
+
11
+ {"epoch": [5], "step": 7825, "train_loss": 0.7278, "val_loss": 0.6442, "val_loss2": 0.6441, "_timestamp": 1744142368, "_runtime": 86030}
12
+
13
+ {"epoch": [6], "step": 9390, "train_loss": 0.7263, "val_loss": 0.6437, "val_loss2": 0.6439, "_timestamp": 1744160135, "_runtime": 103797}
14
+
15
+ {"epoch": [7], "step": 10955, "train_loss": 0.7267, "val_loss": 0.6436, "val_loss2": 0.6429, "_timestamp": 1744177825, "_runtime": 121487}
16
+
17
+ {"epoch": [8], "step": 12520, "train_loss": 0.7264, "val_loss": 0.643, "val_loss2": 0.6432, "_timestamp": 1744195561, "_runtime": 139223}
18
+
19
+ {"epoch": [9], "step": 14085, "train_loss": 0.7259, "val_loss": 0.6427, "val_loss2": 0.6428, "_timestamp": 1744213259, "_runtime": 156921}
20
+
21
+ {"epoch": [10], "step": 15650, "train_loss": 0.7246, "val_loss": 0.6425, "val_loss2": 0.6423, "_timestamp": 1744232117, "_runtime": 175779}
22
+
23
+ {"epoch": [11], "step": 17215, "train_loss": 0.7252, "val_loss": 0.6421, "val_loss2": 0.642, "_timestamp": 1744249856, "_runtime": 193518}
24
+
25
+ {"epoch": [12], "step": 18780, "train_loss": 0.7256, "val_loss": 0.6415, "val_loss2": 0.6417, "_timestamp": 1744269115, "_runtime": 212777}
26
+
27
+ {"epoch": [13], "step": 20345, "train_loss": 0.7239, "val_loss": 0.6421, "val_loss2": 0.6417, "_timestamp": 1744288494, "_runtime": 232156}
28
+
Hindustani-40M-CNN/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:553234f8dbd18d880aa0f9bb18323e13b28b7f1d02e467d4512215442833ab7e
3
+ size 178445189
Hindustani-40M-CNN/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd6df5a64da5e77a2f5fe6e63399344489d1ce28c6c304a0696bb1678ba6ed8
3
+ size 178445701
Hindustani-40M-CNN/summary.jsonl ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743429017", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 1565, "train_loss": 0.7329, "val_loss": 0.6431, "val_loss2": 0.6431, "_timestamp": 1743446526, "_runtime": 17508}
4
+
5
+ {"epoch": [2], "step": 3130, "train_loss": 0.726, "val_loss": 0.6397, "val_loss2": 0.6397, "_timestamp": 1743471378, "_runtime": 42360}
6
+
7
+ {"epoch": [3], "step": 4695, "train_loss": 0.7229, "val_loss": 0.6378, "val_loss2": 0.6379, "_timestamp": 1743496402, "_runtime": 67384}
8
+
9
+ {"epoch": [4], "step": 6260, "train_loss": 0.721, "val_loss": 0.6366, "val_loss2": 0.6365, "_timestamp": 1743521515, "_runtime": 92497}
10
+
11
+ {"epoch": [5], "step": 7825, "train_loss": 0.72, "val_loss": 0.6353, "val_loss2": 0.6358, "_timestamp": 1743540759, "_runtime": 111741}
12
+
13
+ {"epoch": [6], "step": 9390, "train_loss": 0.7181, "val_loss": 0.6341, "val_loss2": 0.6341, "_timestamp": 1743557812, "_runtime": 128794}
14
+
15
+ {"epoch": [7], "step": 10955, "train_loss": 0.7173, "val_loss": 0.6341, "val_loss2": 0.6341, "_timestamp": 1743574819, "_runtime": 145801}
16
+
17
+ {"epoch": [8], "step": 12520, "train_loss": 0.7182, "val_loss": 0.6329, "val_loss2": 0.6332, "_timestamp": 1743591569, "_runtime": 162551}
18
+
19
+ {"epoch": [9], "step": 14085, "train_loss": 0.7174, "val_loss": 0.6329, "val_loss2": 0.6328, "_timestamp": 1743608080, "_runtime": 179062}
20
+
21
+ {"epoch": [10], "step": 15650, "train_loss": 0.716, "val_loss": 0.6321, "val_loss2": 0.6319, "_timestamp": 1743625120, "_runtime": 196102}
22
+
23
+ {"epoch": [11], "step": 17215, "train_loss": 0.7151, "val_loss": 0.6322, "val_loss2": 0.6323, "_timestamp": 1743642231, "_runtime": 213213}
24
+
25
+ {"epoch": [12], "step": 18780, "train_loss": 0.7166, "val_loss": 0.631, "val_loss2": 0.6313, "_timestamp": 1743658843, "_runtime": 229825}
26
+
Hindustani-40M-Transformer/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18a7cdb4f49bf18aa6a7f0ff1c99f4c18131fa7f72feed821fc916ea690a5a43
3
+ size 146260733
Hindustani-40M-Transformer/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f5b4c16508d686118fd28f96fa8df0250ebc4cc10603473c7c22507199a58d8
3
+ size 146263541
Hindustani-40M-Transformer/summary.jsonl ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1742190748", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 1565, "train_loss": 0.7423, "val_loss": 0.649, "val_loss2": 0.6486, "_timestamp": 1742208013, "_runtime": 17264}
4
+
5
+ {"epoch": [2], "step": 3130, "train_loss": 0.7298, "val_loss": 0.6445, "val_loss2": 0.6449, "_timestamp": 1742232058, "_runtime": 41309}
6
+
7
+ {"epoch": [3], "step": 4695, "train_loss": 0.7266, "val_loss": 0.6424, "val_loss2": 0.6425, "_timestamp": 1742256099, "_runtime": 65350}
8
+
9
+ {"epoch": [4], "step": 6260, "train_loss": 0.7238, "val_loss": 0.6414, "val_loss2": 0.6413, "_timestamp": 1742280163, "_runtime": 89414}
10
+
11
+ {"epoch": [5], "step": 7825, "train_loss": 0.7235, "val_loss": 0.6395, "val_loss2": 0.6395, "_timestamp": 1742304443, "_runtime": 113694}
12
+
13
+ {"epoch": [6], "step": 9390, "train_loss": 0.7218, "val_loss": 0.6383, "val_loss2": 0.6387, "_timestamp": 1742328903, "_runtime": 138154}
14
+
15
+ {"epoch": [7], "step": 10955, "train_loss": 0.7213, "val_loss": 0.638, "val_loss2": 0.6379, "_timestamp": 1742353412, "_runtime": 162663}
16
+
17
+ {"epoch": [8], "step": 12520, "train_loss": 0.7215, "val_loss": 0.6373, "val_loss2": 0.6374, "_timestamp": 1742377946, "_runtime": 187197}
18
+
19
+ {"epoch": [9], "step": 14085, "train_loss": 0.7212, "val_loss": 0.6371, "val_loss2": 0.6369, "_timestamp": 1742402470, "_runtime": 211721}
20
+
21
+ {"epoch": [10], "step": 15650, "train_loss": 0.7194, "val_loss": 0.6362, "val_loss2": 0.6362, "_timestamp": 1742426977, "_runtime": 236228}
22
+
23
+ {"epoch": [11], "step": 17215, "train_loss": 0.7189, "val_loss": 0.6361, "val_loss2": 0.6358, "_timestamp": 1742451410, "_runtime": 260661}
24
+
Hindustani-70M-CNN/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05cee93fcd9d1d35c353446eabf2d1d32fcfb7e15baf7b52d5440433f04fe66b
3
+ size 237758893
Hindustani-70M-CNN/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd3a95d457afdd79112308f02d59e8510379b8875b08a1470d2c93c0ca66a70a
3
+ size 237759237
Hindustani-70M-CNN/summary.jsonl ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743671054", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 1565, "train_loss": 0.7311, "val_loss": 0.6419, "val_loss2": 0.6417, "_timestamp": 1743682272, "_runtime": 11217}
4
+
5
+ {"epoch": [2], "step": 3130, "train_loss": 0.7245, "val_loss": 0.6383, "val_loss2": 0.6388, "_timestamp": 1743699666, "_runtime": 28611}
6
+
7
+ {"epoch": [3], "step": 4695, "train_loss": 0.722, "val_loss": 0.6371, "val_loss2": 0.6371, "_timestamp": 1743716290, "_runtime": 45235}
8
+
9
+ {"epoch": [4], "step": 6260, "train_loss": 0.7198, "val_loss": 0.6357, "val_loss2": 0.6353, "_timestamp": 1743732866, "_runtime": 61811}
10
+
11
+ {"epoch": [5], "step": 7825, "train_loss": 0.719, "val_loss": 0.6343, "val_loss2": 0.6342, "_timestamp": 1743749482, "_runtime": 78427}
12
+
13
+ {"epoch": [6], "step": 9390, "train_loss": 0.7168, "val_loss": 0.6329, "val_loss2": 0.633, "_timestamp": 1743767092, "_runtime": 96037}
14
+
15
+ {"epoch": [7], "step": 10955, "train_loss": 0.7165, "val_loss": 0.6326, "val_loss2": 0.6325, "_timestamp": 1743784991, "_runtime": 113936}
16
+
17
+ {"epoch": [8], "step": 12520, "train_loss": 0.7163, "val_loss": 0.6317, "val_loss2": 0.6319, "_timestamp": 1743803174, "_runtime": 132119}
18
+
19
+ {"epoch": [9], "step": 14085, "train_loss": 0.7153, "val_loss": 0.6319, "val_loss2": 0.6317, "_timestamp": 1743820863, "_runtime": 149808}
20
+
21
+ {"epoch": [10], "step": 15650, "train_loss": 0.7144, "val_loss": 0.631, "val_loss2": 0.6311, "_timestamp": 1743838467, "_runtime": 167412}
22
+
23
+ {"epoch": [11], "step": 17215, "train_loss": 0.7139, "val_loss": 0.6307, "val_loss2": 0.6303, "_timestamp": 1743856327, "_runtime": 185272}
24
+
25
+ {"epoch": [12], "step": 18780, "train_loss": 0.7144, "val_loss": 0.6299, "val_loss2": 0.6301, "_timestamp": 1743874113, "_runtime": 203058}
26
+
Hindustani-70M-Transformer/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a32af19c15598b6447aff8c61befd6c4c6d86dcabd016e0202e71161caf76e47
3
+ size 225705984
Hindustani-70M-Transformer/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:315b320c0118a5f3c1ac596cefc50c9e197f6eade205dc499f71fd3c7d1556fc
3
+ size 224657408
Hindustani-70M-Transformer/summary.jsonl ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1741776778", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 1565, "train_loss": 0.7426, "val_loss": 0.6411, "val_loss2": 0.641}
4
+
5
+ {"epoch": [2], "step": 3130, "train_loss": 0.7254, "val_loss": 0.6354, "val_loss2": 0.6355}
6
+
7
+ {"epoch": [3], "step": 4695, "train_loss": 0.7216, "val_loss": 0.6326, "val_loss2": 0.6326}
8
+
9
+ {"epoch": [4], "step": 6260, "train_loss": 0.7189, "val_loss": 0.6312, "val_loss2": 0.631}
10
+
11
+ {"epoch": [5], "step": 7825, "train_loss": 0.7186, "val_loss": 0.6297, "val_loss2": 0.6298}
12
+
13
+ {"epoch": [6], "step": 9390, "train_loss": 0.7166, "val_loss": 0.6285, "val_loss2": 0.6287}
14
+
15
+ {"epoch": [7], "step": 10955, "train_loss": 0.7162, "val_loss": 0.6279, "val_loss2": 0.6279}
16
+
17
+ {"epoch": [8], "step": 12520, "train_loss": 0.716, "val_loss": 0.6273, "val_loss2": 0.6273}
18
+
19
+ {"epoch": [9], "step": 14085, "train_loss": 0.7148, "val_loss": 0.6273, "val_loss2": 0.6272}
20
+
21
+ {"epoch": [10], "step": 15650, "train_loss": 0.7137, "val_loss": 0.6262, "val_loss2": 0.6262}
22
+
23
+ {"epoch": [11], "step": 17215, "train_loss": 0.714, "val_loss": 0.6258, "val_loss2": 0.6257}
24
+
25
+ {"epoch": [12], "step": 18780, "train_loss": 0.7138, "val_loss": 0.6252, "val_loss2": 0.6254}
26
+
Hindustani-8M-CNN/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2edffcfa5c98d06e1e12952344db9a5390dd7b4e172e50b7dc42fae819c0ded5
3
+ size 30130069
Hindustani-8M-CNN/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1328aa3d5ae31ff71b1b730d1abd991a89539bec70979ac7184309d270906455
3
+ size 30130917
Hindustani-8M-CNN/summary.jsonl ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1742452722", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 1565, "train_loss": 0.7361, "val_loss": 0.6453, "val_loss2": 0.6451, "_timestamp": 1742463550, "_runtime": 10827}
4
+
5
+ {"epoch": [2], "step": 3130, "train_loss": 0.7277, "val_loss": 0.6425, "val_loss2": 0.643, "_timestamp": 1742481762, "_runtime": 29039}
6
+
7
+ {"epoch": [3], "step": 4695, "train_loss": 0.7257, "val_loss": 0.6413, "val_loss2": 0.6413, "_timestamp": 1742500242, "_runtime": 47519}
8
+
9
+ {"epoch": [4], "step": 6260, "train_loss": 0.7237, "val_loss": 0.6405, "val_loss2": 0.6402, "_timestamp": 1742518376, "_runtime": 65653}
10
+
11
+ {"epoch": [5], "step": 7825, "train_loss": 0.724, "val_loss": 0.6395, "val_loss2": 0.6394, "_timestamp": 1742536089, "_runtime": 83366}
12
+
13
+ {"epoch": [6], "step": 9390, "train_loss": 0.7224, "val_loss": 0.6391, "val_loss2": 0.639, "_timestamp": 1742553681, "_runtime": 100958}
14
+
15
+ {"epoch": [7], "step": 10955, "train_loss": 0.7225, "val_loss": 0.6384, "val_loss2": 0.6382, "_timestamp": 1742571932, "_runtime": 119209}
16
+
17
+ {"epoch": [8], "step": 12520, "train_loss": 0.7227, "val_loss": 0.6381, "val_loss2": 0.6383, "_timestamp": 1742590268, "_runtime": 137545}
18
+
19
+ {"epoch": [9], "step": 14085, "train_loss": 0.7223, "val_loss": 0.6375, "val_loss2": 0.6378, "_timestamp": 1742607987, "_runtime": 155264}
20
+
21
+ {"epoch": [10], "step": 15650, "train_loss": 0.7207, "val_loss": 0.6369, "val_loss2": 0.6369, "_timestamp": 1742625742, "_runtime": 173019}
22
+
23
+ {"epoch": [11], "step": 17215, "train_loss": 0.7204, "val_loss": 0.6371, "val_loss2": 0.6369, "_timestamp": 1742643602, "_runtime": 190879}
24
+
25
+ {"epoch": [12], "step": 18780, "train_loss": 0.7207, "val_loss": 0.6364, "val_loss2": 0.6367, "_timestamp": 1742661425, "_runtime": 208702}
26
+
27
+ {"epoch": [13], "step": 20345, "train_loss": 0.72, "val_loss": 0.6367, "val_loss2": 0.6367, "_timestamp": 1742678826, "_runtime": 226103}
28
+
29
+ {"epoch": [14], "step": 21910, "train_loss": 0.7192, "val_loss": 0.636, "val_loss2": 0.6358, "_timestamp": 1742696237, "_runtime": 243514}
30
+
31
+ {"epoch": [15], "step": 23475, "train_loss": 0.72, "val_loss": 0.6361, "val_loss2": 0.6358, "_timestamp": 1742713587, "_runtime": 260864}
32
+
Hindustani-8M-Transformer/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:542e668ca30268ff9c0005fe16de2b880c26c2b9ce46aacfaf40aa88bbdd017a
3
+ size 36565318
Hindustani-8M-Transformer/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62152d880c5d883214a6429b6ae3c46d15f2bcbeffdf4808665f1f0ffd90a11f
3
+ size 36566034
Hindustani-8M-Transformer/epoch_20_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13092ebbcfe7261fb5bf9e9533bef0f1008b9d8dbe63c298be983bdcebc47c03
3
+ size 36566034
Hindustani-8M-Transformer/epoch_30_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c89cc516c7aa346f8f7eed31fe46ac24063e0b627785a76e6ae7a07691d325a5
3
+ size 36566034
Hindustani-8M-Transformer/summary.jsonl ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_hindustani.json", "validation_file": "data/metadata_test_hindustani.json", "validation_file2": "data/metadata_test_hindustani.json", "test_file": "data/metadata_test_hindustani.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1741086279", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 1565, "train_loss": 0.737, "val_loss": 0.6425, "val_loss2": 0.6423, "_timestamp": 1741093953, "_runtime": 7673}
4
+
5
+ {"epoch": [2], "step": 3130, "train_loss": 0.7272, "val_loss": 0.6386, "val_loss2": 0.6388, "_timestamp": 1741105095, "_runtime": 18815}
6
+
7
+ {"epoch": [3], "step": 4695, "train_loss": 0.7246, "val_loss": 0.6368, "val_loss2": 0.6366, "_timestamp": 1741116458, "_runtime": 30178}
8
+
9
+ {"epoch": [4], "step": 6260, "train_loss": 0.7219, "val_loss": 0.6357, "val_loss2": 0.6355, "_timestamp": 1741127983, "_runtime": 41703}
10
+
11
+ {"epoch": [5], "step": 7825, "train_loss": 0.7217, "val_loss": 0.6346, "val_loss2": 0.6347, "_timestamp": 1741139501, "_runtime": 53221}
12
+
13
+ {"epoch": [6], "step": 9390, "train_loss": 0.7211, "val_loss": 0.6338, "val_loss2": 0.6339, "_timestamp": 1741151021, "_runtime": 64741}
14
+
15
+ {"epoch": [7], "step": 10955, "train_loss": 0.7196, "val_loss": 0.6333, "val_loss2": 0.6333, "_timestamp": 1741163406, "_runtime": 77126}
16
+
17
+ {"epoch": [8], "step": 12520, "train_loss": 0.7205, "val_loss": 0.6328, "val_loss2": 0.6328, "_timestamp": 1741174640, "_runtime": 88360}
18
+
19
+ {"epoch": [9], "step": 14085, "train_loss": 0.7196, "val_loss": 0.6326, "val_loss2": 0.6326, "_timestamp": 1741186128, "_runtime": 99848}
20
+
21
+ {"epoch": [10], "step": 15650, "train_loss": 0.7183, "val_loss": 0.6321, "val_loss2": 0.6321, "_timestamp": 1741197738, "_runtime": 111458}
22
+
23
+ {"epoch": [11], "step": 17215, "train_loss": 0.7183, "val_loss": 0.6319, "val_loss2": 0.6317, "_timestamp": 1741209314, "_runtime": 123034}
24
+
25
+ {"epoch": [12], "step": 18780, "train_loss": 0.7189, "val_loss": 0.6313, "val_loss2": 0.6315, "_timestamp": 1741220875, "_runtime": 134595}
26
+
27
+ {"epoch": [13], "step": 20345, "train_loss": 0.7184, "val_loss": 0.6316, "val_loss2": 0.6314, "_timestamp": 1741232412, "_runtime": 146132}
28
+
29
+ {"epoch": [14], "step": 21910, "train_loss": 0.7175, "val_loss": 0.6312, "val_loss2": 0.6309, "_timestamp": 1741243935, "_runtime": 157655}
30
+
31
+ {"epoch": [15], "step": 23475, "train_loss": 0.7182, "val_loss": 0.6309, "val_loss2": 0.6306, "_timestamp": 1741255173, "_runtime": 168893}
32
+
33
+ {"epoch": [16], "step": 25040, "train_loss": 0.7178, "val_loss": 0.6305, "val_loss2": 0.6306, "_timestamp": 1741266408, "_runtime": 180128}
34
+
35
+ {"epoch": [17], "step": 26605, "train_loss": 0.7172, "val_loss": 0.6304, "val_loss2": 0.6305, "_timestamp": 1741277843, "_runtime": 191563}
36
+
37
+ {"epoch": [18], "step": 28170, "train_loss": 0.7168, "val_loss": 0.63, "val_loss2": 0.6302, "_timestamp": 1741289372, "_runtime": 203092}
38
+
39
+ {"epoch": [19], "step": 29735, "train_loss": 0.7151, "val_loss": 0.6301, "val_loss2": 0.63, "_timestamp": 1741300958, "_runtime": 214678}
40
+
41
+ {"epoch": [20], "step": 31300, "train_loss": 0.7164, "val_loss": 0.6296, "val_loss2": 0.6298, "_timestamp": 1741312540, "_runtime": 226260}
42
+
43
+ {"epoch": [21], "step": 32865, "train_loss": 0.7183, "val_loss": 0.6298, "val_loss2": 0.6298, "_timestamp": 1741324094, "_runtime": 237814}
44
+
45
+ {"epoch": [22], "step": 34430, "train_loss": 0.7142, "val_loss": 0.6297, "val_loss2": 0.6296, "_timestamp": 1741335262, "_runtime": 248982}
46
+
47
+ {"epoch": [23], "step": 35995, "train_loss": 0.7173, "val_loss": 0.6294, "val_loss2": 0.6293, "_timestamp": 1741346585, "_runtime": 260305}
48
+
49
+ {"epoch": [24], "step": 37560, "train_loss": 0.7155, "val_loss": 0.6293, "val_loss2": 0.6292, "_timestamp": 1741358197, "_runtime": 271917}
50
+
51
+ {"epoch": [25], "step": 39125, "train_loss": 0.715, "val_loss": 0.6293, "val_loss2": 0.6294, "_timestamp": 1741369927, "_runtime": 283647}
52
+
53
+ {"epoch": [26], "step": 40690, "train_loss": 0.7161, "val_loss": 0.6289, "val_loss2": 0.6292, "_timestamp": 1741381523, "_runtime": 295243}
54
+
55
+ {"epoch": [27], "step": 42255, "train_loss": 0.7151, "val_loss": 0.6289, "val_loss2": 0.629, "_timestamp": 1741393101, "_runtime": 306821}
56
+
57
+ {"epoch": [28], "step": 43820, "train_loss": 0.716, "val_loss": 0.6287, "val_loss2": 0.6287, "_timestamp": 1741405033, "_runtime": 318753}
58
+
59
+ {"epoch": [29], "step": 45385, "train_loss": 0.7151, "val_loss": 0.6287, "val_loss2": 0.6289, "_timestamp": 1741416612, "_runtime": 330332}
60
+
61
+ {"epoch": [30], "step": 46950, "train_loss": 0.7158, "val_loss": 0.6289, "val_loss2": 0.6286, "_timestamp": 1741428200, "_runtime": 341920}
62
+
63
+ {"epoch": [31], "step": 48515, "train_loss": 0.7143, "val_loss": 0.6283, "val_loss2": 0.6287, "_timestamp": 1741440185, "_runtime": 353905}
64
+
65
+ {"epoch": [32], "step": 50080, "train_loss": 0.7147, "val_loss": 0.6285, "val_loss2": 0.6285, "_timestamp": 1741452877, "_runtime": 366597}
66
+
67
+ {"epoch": [33], "step": 51645, "train_loss": 0.7141, "val_loss": 0.6286, "val_loss2": 0.6284, "_timestamp": 1741464341, "_runtime": 378061}
68
+
69
+ {"epoch": [34], "step": 53210, "train_loss": 0.7144, "val_loss": 0.6282, "val_loss2": 0.6283, "_timestamp": 1741476021, "_runtime": 389741}
70
+
71
+ {"epoch": [35], "step": 54775, "train_loss": 0.7142, "val_loss": 0.6284, "val_loss2": 0.6281, "_timestamp": 1741488730, "_runtime": 402450}
72
+
73
+ {"epoch": [36], "step": 56340, "train_loss": 0.7135, "val_loss": 0.628, "val_loss2": 0.628, "_timestamp": 1741501554, "_runtime": 415274}
74
+
75
+ {"epoch": [37], "step": 57905, "train_loss": 0.7126, "val_loss": 0.6281, "val_loss2": 0.6282, "_timestamp": 1741514134, "_runtime": 427854}
76
+
Makam-20M-CNN/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589da21dc6da733e8eb4d386684429eee3aae4a2adf90164c13709493fcb83ed
3
+ size 89348293
Makam-20M-CNN/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b145760b79ed6a52b82da384721b29542556a594401c7c5ccc7d8bfb237dfde7
3
+ size 89348805
Makam-20M-CNN/summary.jsonl ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": false, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743793947", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": 1, "step": 2356, "train_loss": 0.7477, "val_loss": 0.6585, "val_loss2": 0.657}
4
+
5
+ {"epoch": 2, "step": 4712, "train_loss": 0.7414, "val_loss": 0.6555, "val_loss2": 0.6554}
6
+
7
+ {"epoch": 3, "step": 7068, "train_loss": 0.7397, "val_loss": 0.6555, "val_loss2": 0.6541}
8
+
9
+ {"epoch": 4, "step": 9424, "train_loss": 0.7382, "val_loss": 0.6536, "val_loss2": 0.6532}
10
+
11
+ {"epoch": 5, "step": 11780, "train_loss": 0.7384, "val_loss": 0.6538, "val_loss2": 0.6533}
12
+
13
+ {"epoch": 6, "step": 14136, "train_loss": 0.7358, "val_loss": 0.6533, "val_loss2": 0.6524}
14
+
15
+ {"epoch": 7, "step": 16492, "train_loss": 0.738, "val_loss": 0.6523, "val_loss2": 0.6522}
16
+
17
+ {"epoch": 8, "step": 18848, "train_loss": 0.7383, "val_loss": 0.6533, "val_loss2": 0.6516}
18
+
19
+ {"epoch": 9, "step": 21204, "train_loss": 0.7368, "val_loss": 0.6519, "val_loss2": 0.6515}
20
+
21
+ {"epoch": 10, "step": 23560, "train_loss": 0.734, "val_loss": 0.6517, "val_loss2": 0.6511}
22
+
23
+ {"epoch": 11, "step": 25916, "train_loss": 0.7363, "val_loss": 0.652, "val_loss2": 0.6507}
24
+
25
+ {"epoch": 12, "step": 28272, "train_loss": 0.734, "val_loss": 0.6503, "val_loss2": 0.6502}
26
+
Makam-2M-CNN/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b24adf7ac33e9178f7b82dc1add0f16f43a3acdfa5fdb8a661518865b66f6fc
3
+ size 6025775
Makam-2M-CNN/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5641481689053fefdd6d4e9c9d9147e30ac323e3f6f758126f3a3cb0cdd8147e
3
+ size 6025951
Makam-2M-CNN/summary.jsonl ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": false, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1744091442", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": 1, "step": 2356, "train_loss": 0.7514, "val_loss": 0.6619, "val_loss2": 0.6603}
4
+
5
+ {"epoch": 2, "step": 4712, "train_loss": 0.7445, "val_loss": 0.6591, "val_loss2": 0.659}
6
+
7
+ {"epoch": 3, "step": 7068, "train_loss": 0.7422, "val_loss": 0.6582, "val_loss2": 0.6578}
8
+
9
+ {"epoch": 4, "step": 9424, "train_loss": 0.7414, "val_loss": 0.6579, "val_loss2": 0.6574}
10
+
11
+ {"epoch": 5, "step": 11780, "train_loss": 0.7421, "val_loss": 0.658, "val_loss2": 0.657}
12
+
13
+ {"epoch": 6, "step": 14136, "train_loss": 0.7414, "val_loss": 0.657, "val_loss2": 0.6564}
14
+
15
+ {"epoch": 7, "step": 16492, "train_loss": 0.7419, "val_loss": 0.6562, "val_loss2": 0.6563}
16
+
17
+ {"epoch": 8, "step": 18848, "train_loss": 0.742, "val_loss": 0.6557, "val_loss2": 0.656}
18
+
19
+ {"epoch": 9, "step": 21204, "train_loss": 0.7416, "val_loss": 0.6566, "val_loss2": 0.6558}
20
+
21
+ {"epoch": 10, "step": 23560, "train_loss": 0.739, "val_loss": 0.6556, "val_loss2": 0.6556}
22
+
23
+ {"epoch": 11, "step": 25916, "train_loss": 0.7415, "val_loss": 0.6557, "val_loss2": 0.6553}
24
+
25
+ {"epoch": 12, "step": 28272, "train_loss": 0.7391, "val_loss": 0.6554, "val_loss2": 0.6549}
26
+
Makam-40M-CNN/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ccb7b736a623fa20ec16be2ad8c52366336f26aa59089609cc19004adad4325
3
+ size 178445189
Makam-40M-CNN/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6726e1df1e7d575cba093b05e7442423e5e6c62721fa04796fbe1a6e43c8990a
3
+ size 178445701
Makam-40M-CNN/summary.jsonl ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": false, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743635059", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": 1, "step": 2356, "train_loss": 0.7469, "val_loss": 0.6584, "val_loss2": 0.6558}
4
+
5
+ {"epoch": 2, "step": 4712, "train_loss": 0.7405, "val_loss": 0.6542, "val_loss2": 0.6542}
6
+
7
+ {"epoch": 3, "step": 7068, "train_loss": 0.7387, "val_loss": 0.6529, "val_loss2": 0.6528}
8
+
9
+ {"epoch": 4, "step": 9424, "train_loss": 0.7368, "val_loss": 0.6534, "val_loss2": 0.6522}
10
+
11
+ {"epoch": 5, "step": 11780, "train_loss": 0.7369, "val_loss": 0.6515, "val_loss2": 0.6518}
12
+
13
+ {"epoch": 6, "step": 14136, "train_loss": 0.736, "val_loss": 0.6507, "val_loss2": 0.6511}
14
+
15
+ {"epoch": 7, "step": 16492, "train_loss": 0.7362, "val_loss": 0.6521, "val_loss2": 0.6509}
16
+
17
+ {"epoch": 8, "step": 18848, "train_loss": 0.7364, "val_loss": 0.6498, "val_loss2": 0.6505}
18
+
19
+ {"epoch": 9, "step": 21204, "train_loss": 0.7355, "val_loss": 0.6505, "val_loss2": 0.6502}
20
+
21
+ {"epoch": 10, "step": 23560, "train_loss": 0.7335, "val_loss": 0.6499, "val_loss2": 0.6497}
22
+
23
+ {"epoch": 11, "step": 25916, "train_loss": 0.7342, "val_loss": 0.649, "val_loss2": 0.6493}
24
+
25
+ {"epoch": 12, "step": 28272, "train_loss": 0.7328, "val_loss": 0.6488, "val_loss2": 0.6491}
26
+
27
+ {"epoch": 13, "step": 30628, "train_loss": 0.7329, "val_loss": 0.6488, "val_loss2": 0.649}
28
+
Makam-40M-Transformer/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b28940e3aa8bbaa5c89f16f590b24a07b1a6d5eb149c12103d528df20e917fa
3
+ size 146260733
Makam-40M-Transformer/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d6e85d3826cb11c10c5d7d5b4c1ec87fa1e4b313a37664cff941bf0153480e
3
+ size 146263541
Makam-40M-Transformer/summary.jsonl ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": false, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1743409928", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": 1, "step": 2356, "train_loss": 0.7545, "val_loss": 0.661, "val_loss2": 0.6601}
4
+
5
+ {"epoch": 2, "step": 4712, "train_loss": 0.7435, "val_loss": 0.6579, "val_loss2": 0.6572}
6
+
7
+ {"epoch": 3, "step": 7068, "train_loss": 0.7413, "val_loss": 0.6568, "val_loss2": 0.6559}
8
+
9
+ {"epoch": 4, "step": 9424, "train_loss": 0.7389, "val_loss": 0.655, "val_loss2": 0.6548}
10
+
11
+ {"epoch": 5, "step": 11780, "train_loss": 0.7394, "val_loss": 0.6542, "val_loss2": 0.6537}
12
+
13
+ {"epoch": 6, "step": 14136, "train_loss": 0.7377, "val_loss": 0.6541, "val_loss2": 0.6532}
14
+
15
+ {"epoch": 7, "step": 16492, "train_loss": 0.7388, "val_loss": 0.6526, "val_loss2": 0.6528}
16
+
17
+ {"epoch": 8, "step": 18848, "train_loss": 0.7391, "val_loss": 0.6531, "val_loss2": 0.6525}
18
+
19
+ {"epoch": 9, "step": 21204, "train_loss": 0.7373, "val_loss": 0.6523, "val_loss2": 0.6522}
20
+
21
+ {"epoch": 10, "step": 23560, "train_loss": 0.7355, "val_loss": 0.6517, "val_loss2": 0.6518}
22
+
23
+ {"epoch": 11, "step": 25916, "train_loss": 0.7366, "val_loss": 0.6516, "val_loss2": 0.6517}
24
+
Makam-70M-CNN/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c46e98bb13cced40257758f1aa2a734849eff7b693bb6d2c8ab7954b1293b73
3
+ size 237758893
Makam-70M-CNN/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ccfa3d5fc162e769f0a329d7514e22612410054c99c4d8d1785f15d0d91932f
3
+ size 237759237
Makam-70M-CNN/summary.jsonl ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1742539331", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 2356, "train_loss": 0.7458, "val_loss": 0.6554, "val_loss2": 0.655}
4
+
5
+ {"epoch": [2], "step": 4712, "train_loss": 0.7402, "val_loss": 0.6537, "val_loss2": 0.6532}
6
+
7
+ {"epoch": [3], "step": 7068, "train_loss": 0.7373, "val_loss": 0.652, "val_loss2": 0.652}
8
+
9
+ {"epoch": [4], "step": 9424, "train_loss": 0.7362, "val_loss": 0.6519, "val_loss2": 0.6512}
10
+
11
+ {"epoch": [5], "step": 11780, "train_loss": 0.7357, "val_loss": 0.6516, "val_loss2": 0.6504}
12
+
13
+ {"epoch": [6], "step": 14136, "train_loss": 0.7348, "val_loss": 0.6512, "val_loss2": 0.6502}
14
+
15
+ {"epoch": [7], "step": 16492, "train_loss": 0.7347, "val_loss": 0.6502, "val_loss2": 0.6496}
16
+
17
+ {"epoch": [8], "step": 18848, "train_loss": 0.7351, "val_loss": 0.6483, "val_loss2": 0.649}
18
+
19
+ {"epoch": [9], "step": 21204, "train_loss": 0.734, "val_loss": 0.6492, "val_loss2": 0.6488}
20
+
21
+ {"epoch": [10], "step": 23560, "train_loss": 0.7314, "val_loss": 0.6481, "val_loss2": 0.6482}
22
+
23
+ {"epoch": [11], "step": 25916, "train_loss": 0.7328, "val_loss": 0.6477, "val_loss2": 0.6482}
24
+
25
+ {"epoch": [12], "step": 28272, "train_loss": 0.731, "val_loss": 0.6473, "val_loss2": 0.6475}
26
+
27
+ {"epoch": [13], "step": 30628, "train_loss": 0.7315, "val_loss": 0.648, "val_loss2": 0.6476}
28
+
29
+ {"epoch": [14], "step": 32984, "train_loss": 0.7321, "val_loss": 0.6491, "val_loss2": 0.6473}
30
+
31
+ {"epoch": [15], "step": 35340, "train_loss": 0.7308, "val_loss": 0.6472, "val_loss2": 0.6471}
32
+
Makam-70M-Transformer/best_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08eeaab800a1511a2ce78db933b6cdd805cfb6c57fb8efd47561bbcf49514f46
3
+ size 292523194
Makam-70M-Transformer/epoch_10_adapters.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8695a028a169066c9db0c8427aa1cc177306da4f6c1568a5a3b639ff36583c1
3
+ size 292528810
Makam-70M-Transformer/summary.jsonl ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_file": "data/metadata_train_makam.json", "validation_file": "data/metadata_val_makam.json", "validation_file2": "data/metadata_test_makam.json", "test_file": "data/metadata_test_makam.json", "num_examples": -1, "text_encoder_name": "google/flan-t5-large", "scheduler_name": "stabilityai/stable-diffusion-2-1", "unet_model_name": null, "unet_model_config": "configs/diffusion_model_config_munet.json", "hf_model": null, "snr_gamma": 5.0, "freeze_text_encoder": true, "text_column": "main_caption", "text2_column": "alt_caption", "audio_column": "location", "beats_column": "beats", "chords_column": "chords", "chords_time_column": "chords_time", "uncondition": false, "uncondition_all": true, "uncondition_single": true, "drop_sentences": true, "random_pick_text_column": true, "model_type": "Mustango", "prefix": null, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 4, "learning_rate": 4.5e-05, "weight_decay": 1e-08, "num_train_epochs": 100, "max_train_steps": null, "gradient_accumulation_steps": 4, "lr_scheduler_type": "linear", "num_warmup_steps": 0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.0001, "adam_epsilon": 1e-08, "output_dir": "saved/1741802495", "seed": 1234, "checkpointing_steps": "best", "save_every": 10, "resume_from_checkpoint": null, "with_tracking": false, "report_to": "all"}
2
+
3
+ {"epoch": [1], "step": 2356, "train_loss": 0.7555, "val_loss": 0.6576, "val_loss2": 0.6576, "_timestamp": 1741815555, "_runtime": 13060}
4
+
5
+ {"epoch": [2], "step": 4712, "train_loss": 0.7415, "val_loss": 0.6543, "val_loss2": 0.6547, "_timestamp": 1741828919, "_runtime": 26424}
6
+
7
+ {"epoch": [3], "step": 7068, "train_loss": 0.739, "val_loss": 0.6531, "val_loss2": 0.6528, "_timestamp": 1741844156, "_runtime": 41661}
8
+
9
+ {"epoch": [4], "step": 9424, "train_loss": 0.737, "val_loss": 0.6517, "val_loss2": 0.6522, "_timestamp": 1741859740, "_runtime": 57245}
10
+
11
+ {"epoch": [5], "step": 11780, "train_loss": 0.7377, "val_loss": 0.6509, "val_loss2": 0.651, "_timestamp": 1741880157, "_runtime": 77662}
12
+
13
+ {"epoch": [6], "step": 14136, "train_loss": 0.7356, "val_loss": 0.6504, "val_loss2": 0.6506, "_timestamp": 1741896167, "_runtime": 93672}
14
+
15
+ {"epoch": [7], "step": 16492, "train_loss": 0.7361, "val_loss": 0.6494, "val_loss2": 0.6499, "_timestamp": 1741911385, "_runtime": 108890}
16
+
17
+ {"epoch": [8], "step": 18848, "train_loss": 0.736, "val_loss": 0.6494, "val_loss2": 0.6496, "_timestamp": 1741925596, "_runtime": 123101}
18
+
19
+ {"epoch": [9], "step": 21204, "train_loss": 0.735, "val_loss": 0.6482, "val_loss2": 0.6488, "_timestamp": 1741939137, "_runtime": 136642}
20
+
21
+ {"epoch": [10], "step": 23560, "train_loss": 0.7326, "val_loss": 0.6482, "val_loss2": 0.6488, "_timestamp": 1741958327, "_runtime": 155832}
22
+
23
+ {"epoch": [11], "step": 25916, "train_loss": 0.7342, "val_loss": 0.6477, "val_loss2": 0.6482, "_timestamp": 1741980523, "_runtime": 178028}
24
+
25
+ {"epoch": [12], "step": 28272, "train_loss": 0.7324, "val_loss": 0.6474, "val_loss2": 0.6478, "_timestamp": 1741995386, "_runtime": 192891}
26
+
27
+ {"epoch": [13], "step": 30628, "train_loss": 0.7319, "val_loss": 0.648, "val_loss2": 0.648, "_timestamp": 1742011261, "_runtime": 208766}
28
+