File size: 1,534 Bytes
9ff53eb
2905751
 
85bb8f8
2905751
c328672
2905751
 
 
 
 
 
 
 
 
85bb8f8
c328672
2905751
 
 
 
 
 
 
 
85bb8f8
c328672
2905751
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ff53eb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
{
    "model_type": "musicgen",
    "text_encoder": {
        "model_type": "t5",
        "name_or_path": "t5-base",
        "type": "T5EncoderModel",
        "config": {
            "vocab_size": 32128,
            "d_model": 1024,
            "num_layers": 12,
            "num_heads": 16,
            "dropout_rate": 0.1
        }
    },
    "audio_encoder": {
        "model_type": "wav2vec2",
        "name_or_path": "facebook/wav2vec2-large",
        "type": "AudioEncoder",
        "config": {
            "sample_rate": 32000,
            "num_channels": 1,
            "embedding_size": 512
        }
    },
    "decoder": {
        "model_type": "transformer",
        "name_or_path": "facebook/musicgen-large",
        "type": "TransformerDecoder",
        "config": {
            "d_model": 1024,
            "num_heads": 16,
            "num_layers": 24,
            "dropout_rate": 0.1
        }
    },
    "training": {
        "batch_size": 16,
        "num_epochs": 100,
        "learning_rate": 0.0001,
        "weight_decay": 0.01,
        "gradient_clipping": 1.0
    },
    "generation": {
        "sample_rate": 32000,
        "audio_format": "wav",
        "num_samples": 5,
        "max_duration": 30.0,
        "temperature": 1.0,
        "top_k": 250,
        "top_p": 0.9
    },
    "logging": {
        "log_tensorboard": true,
        "log_wandb": true,
        "wandb_project": "music_generation",
        "log_updates": 10
    },
    "hardware": {
        "device": "cuda",
        "num_gpus": 4
    }
}