{ | |
"model_type": "musicgen", | |
"text_encoder": { | |
"model_type": "t5", | |
"name_or_path": "t5-base", | |
"type": "T5EncoderModel", | |
"config": { | |
"vocab_size": 32128, | |
"d_model": 1024, | |
"num_layers": 12, | |
"num_heads": 16, | |
"dropout_rate": 0.1 | |
} | |
}, | |
"audio_encoder": { | |
"model_type": "wav2vec2", | |
"name_or_path": "facebook/wav2vec2-large", | |
"type": "AudioEncoder", | |
"config": { | |
"sample_rate": 32000, | |
"num_channels": 1, | |
"embedding_size": 512 | |
} | |
}, | |
"decoder": { | |
"model_type": "transformer", | |
"name_or_path": "facebook/musicgen-large", | |
"type": "TransformerDecoder", | |
"config": { | |
"d_model": 1024, | |
"num_heads": 16, | |
"num_layers": 24, | |
"dropout_rate": 0.1 | |
} | |
}, | |
"training": { | |
"batch_size": 16, | |
"num_epochs": 100, | |
"learning_rate": 0.0001, | |
"weight_decay": 0.01, | |
"gradient_clipping": 1.0 | |
}, | |
"generation": { | |
"sample_rate": 32000, | |
"audio_format": "wav", | |
"num_samples": 5, | |
"max_duration": 30.0, | |
"temperature": 1.0, | |
"top_k": 250, | |
"top_p": 0.9 | |
}, | |
"logging": { | |
"log_tensorboard": true, | |
"log_wandb": true, | |
"wandb_project": "music_generation", | |
"log_updates": 10 | |
}, | |
"hardware": { | |
"device": "cuda", | |
"num_gpus": 4 | |
} | |
} |