accumulate_grad_batches: 1 base_config: config/config_base.yaml batch_max_tokens: 12000 batch_size: 2 cfg_init: 1.0 cfg_scale: 4.0 cfg_schedule: linear check_val_every_n_epoch: 10 clip_grad_norm: 0 data_dir: '' debug: false deep_speed_strategy_stage: 2 drop_last: true dynamic_cfg: false endless_ds: false filter_args: lang: - zh - en max_spk_num: 6 speech_ratio: 0.6 gradient_clip_val: 1.0 indexed_ds: true infer: false infer_exp_name: '' infer_json_path: '' inference_ckpt: '' inference_mode: nonstreaming learning_rate: 1e-4 limit_val_batches: 100 load_opt: false log_interval: 10 logger_type: tensorboard loss: lambda_fm: 1.0 lambda_phone: 0.0 mel_loss: l1 max_epochs: 1000 max_eval_sentences: -1 max_eval_tokens: -1 max_prompt_ratio: 0.5 max_segment_cnt: 20000 max_sentences: -1 max_speech_duration: 20 max_tokens: 31250 max_training_steps: 100000 max_updates: 160000 mel_mean: -4.479605 mel_std: 3.4584913 meta_dir: null min_prompt_duration: 0.5 min_speech_duration: -1 model: condition_prenet_depth: 6 dit: chunk_params: hz: 50 max_chunk: 3.0 max_chunk_history: 50000000 min_chunk: 0.5 need_block_shift: false condition_input_dim: 1280 condition_type: discrete_codes depth: 16 ffn_act_layer: gleu_tanh ffn_conv_kernel_size: 5 ffn_gated_glu: false ffn_type: vanilla_mlp hidden_size: 2304 input_size: 80 max_seq_len: 4096 mlp_ratio: 4.0 num_heads: 18 position_embedding_type: skip prompt_cfg_dropout: 0.2 rope_params: max_position_embeddings: 4096 rope_base: 10000.0 rope_interpolation_factor: 1.0 semantic_cfg_dropout: 0.2 semantic_vocab_size: 16384 use_chunk_setting: true use_rope: true phone_predictor: blank_id: 4 phone_vocab_size: 5000 position_id_start_from: 0 random_position_start: true restart_position_ids: false use_condition_prenet: false need_merge_same_speaker: true need_precise_phones: false no_verlap: true normalize_mel: true num_nodes: 1 num_sanity_val_steps: 0 num_workers: 1 ode_steps: 150 optimizer_adam_beta1: 0.9 optimizer_adam_beta2: 0.98 optimizer_class: adamw pin_memory: true precision: bf16-mixed save_interval: 2000 save_topk: 10 seed: 1234 shuffle: true sort_by_len: true src_sample_rate: 16000 strategy: ddp tensorboard_dir: tb_logs test_num: 100 tgt_sample_rate: 24000 timescale: 80000 use_cfg: false use_cfg_rescale: false use_distributed_sampler: false use_uncondition: false val_check_interval: 2000000 vocoder_ckpt: '' wandb_name: glm4_semantic_cfm_v2_debug warmup_updates: 100 weight_decay: 0.0001