bigmoyan's picture
Upload folder using huggingface_hub
c102e8c verified
accumulate_grad_batches: 1
base_config: config/config_base.yaml
batch_max_tokens: 12000
batch_size: 2
cfg_init: 1.0
cfg_scale: 4.0
cfg_schedule: linear
check_val_every_n_epoch: 10
clip_grad_norm: 0
data_dir: ''
debug: false
deep_speed_strategy_stage: 2
drop_last: true
dynamic_cfg: false
endless_ds: false
filter_args:
lang:
- zh
- en
max_spk_num: 6
speech_ratio: 0.6
gradient_clip_val: 1.0
indexed_ds: true
infer: false
infer_exp_name: ''
infer_json_path: ''
inference_ckpt: ''
inference_mode: nonstreaming
learning_rate: 1e-4
limit_val_batches: 100
load_opt: false
log_interval: 10
logger_type: tensorboard
loss:
lambda_fm: 1.0
lambda_phone: 0.0
mel_loss: l1
max_epochs: 1000
max_eval_sentences: -1
max_eval_tokens: -1
max_prompt_ratio: 0.5
max_segment_cnt: 20000
max_sentences: -1
max_speech_duration: 20
max_tokens: 31250
max_training_steps: 100000
max_updates: 160000
mel_mean: -4.479605
mel_std: 3.4584913
meta_dir: null
min_prompt_duration: 0.5
min_speech_duration: -1
model:
condition_prenet_depth: 6
dit:
chunk_params:
hz: 50
max_chunk: 3.0
max_chunk_history: 50000000
min_chunk: 0.5
need_block_shift: false
condition_input_dim: 1280
condition_type: discrete_codes
depth: 16
ffn_act_layer: gleu_tanh
ffn_conv_kernel_size: 5
ffn_gated_glu: false
ffn_type: vanilla_mlp
hidden_size: 2304
input_size: 80
max_seq_len: 4096
mlp_ratio: 4.0
num_heads: 18
position_embedding_type: skip
prompt_cfg_dropout: 0.2
rope_params:
max_position_embeddings: 4096
rope_base: 10000.0
rope_interpolation_factor: 1.0
semantic_cfg_dropout: 0.2
semantic_vocab_size: 16384
use_chunk_setting: true
use_rope: true
phone_predictor:
blank_id: 4
phone_vocab_size: 5000
position_id_start_from: 0
random_position_start: true
restart_position_ids: false
use_condition_prenet: false
need_merge_same_speaker: true
need_precise_phones: false
no_verlap: true
normalize_mel: true
num_nodes: 1
num_sanity_val_steps: 0
num_workers: 1
ode_steps: 150
optimizer_adam_beta1: 0.9
optimizer_adam_beta2: 0.98
optimizer_class: adamw
pin_memory: true
precision: bf16-mixed
save_interval: 2000
save_topk: 10
seed: 1234
shuffle: true
sort_by_len: true
src_sample_rate: 16000
strategy: ddp
tensorboard_dir: tb_logs
test_num: 100
tgt_sample_rate: 24000
timescale: 80000
use_cfg: false
use_cfg_rescale: false
use_distributed_sampler: false
use_uncondition: false
val_check_interval: 2000000
vocoder_ckpt: ''
wandb_name: glm4_semantic_cfm_v2_debug
warmup_updates: 100
weight_decay: 0.0001