qingzhengwang commited on
Commit
5b22864
·
1 Parent(s): 1c5b185

Update model

Browse files
Files changed (24) hide show
  1. README.md +310 -3
  2. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/config.yaml +235 -0
  3. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/accuracy.png +0 -0
  4. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/backward_time.png +0 -0
  5. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/class_loss.png +0 -0
  6. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/clip.png +0 -0
  7. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/forward_time.png +0 -0
  8. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/geo_loss_all.png +0 -0
  9. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/geo_loss_downstream.png +0 -0
  10. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/gpu_max_cached_mem_GB.png +0 -0
  11. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/grad_norm.png +0 -0
  12. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/inter_geo_loss_layer32.png +0 -0
  13. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/inter_geo_loss_layer36.png +0 -0
  14. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/inter_geo_loss_layer40.png +0 -0
  15. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/inter_geo_loss_layer44.png +0 -0
  16. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/inter_geo_loss_mean.png +0 -0
  17. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/iter_time.png +0 -0
  18. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/loss.png +0 -0
  19. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/loss_scale.png +0 -0
  20. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/optim0_lr0.png +0 -0
  21. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/optim_step_time.png +0 -0
  22. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/train_time.png +0 -0
  23. exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/valid.accuracy.best.pth +3 -0
  24. meta.yaml +8 -0
README.md CHANGED
@@ -1,3 +1,310 @@
1
- ---
2
- license: cc-by-4.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - language-identification
6
+ language: multilingual
7
+ datasets:
8
+ - geolid
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 LID model
13
+
14
+ ### `espnet/geolid_vl107only_independent_frozen`
15
+
16
+ This model was trained by Qingzheng-Wang using geolid recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout 77e4293952083b9e32bc19a5ddc19efe45e70e4a
26
+ pip install -e .
27
+ cd egs2/geolid/lid1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/geolid_vl107only_independent_frozen
29
+ ```
30
+
31
+
32
+
33
+ ## LID config
34
+
35
+ <details><summary>expand</summary>
36
+
37
+ ```
38
+ config: conf/voxlingua107_only/mms_ecapa_upcon_32_44_it0.4_independent_frozen.yaml
39
+ print_config: false
40
+ log_level: INFO
41
+ drop_last_iter: false
42
+ dry_run: false
43
+ iterator_type: category
44
+ valid_iterator_type: category
45
+ output_dir: exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw
46
+ ngpu: 1
47
+ seed: 3702
48
+ num_workers: 8
49
+ num_att_plot: 0
50
+ dist_backend: nccl
51
+ dist_init_method: env://
52
+ dist_world_size: null
53
+ dist_rank: null
54
+ local_rank: 0
55
+ dist_master_addr: null
56
+ dist_master_port: null
57
+ dist_launcher: null
58
+ multiprocessing_distributed: false
59
+ unused_parameters: true
60
+ sharded_ddp: false
61
+ use_deepspeed: false
62
+ deepspeed_config: null
63
+ gradient_as_bucket_view: true
64
+ ddp_comm_hook: null
65
+ cudnn_enabled: true
66
+ cudnn_benchmark: true
67
+ cudnn_deterministic: false
68
+ use_tf32: false
69
+ collect_stats: false
70
+ write_collected_feats: false
71
+ max_epoch: 30
72
+ patience: null
73
+ val_scheduler_criterion:
74
+ - valid
75
+ - loss
76
+ early_stopping_criterion:
77
+ - valid
78
+ - loss
79
+ - min
80
+ best_model_criterion:
81
+ - - valid
82
+ - accuracy
83
+ - max
84
+ keep_nbest_models: 2
85
+ nbest_averaging_interval: 0
86
+ grad_clip: 9999
87
+ grad_clip_type: 2.0
88
+ grad_noise: false
89
+ accum_grad: 2
90
+ no_forward_run: false
91
+ resume: true
92
+ train_dtype: float32
93
+ use_amp: true
94
+ log_interval: 100
95
+ use_matplotlib: true
96
+ use_tensorboard: true
97
+ create_graph_in_tensorboard: false
98
+ use_wandb: false
99
+ wandb_project: null
100
+ wandb_id: null
101
+ wandb_entity: null
102
+ wandb_name: null
103
+ wandb_model_log_interval: -1
104
+ detect_anomaly: false
105
+ use_adapter: false
106
+ adapter: lora
107
+ save_strategy: all
108
+ adapter_conf: {}
109
+ pretrain_path: null
110
+ init_param: []
111
+ ignore_init_mismatch: false
112
+ freeze_param: []
113
+ num_iters_per_epoch: 1000
114
+ batch_size: 20
115
+ valid_batch_size: null
116
+ batch_bins: 2880000
117
+ valid_batch_bins: null
118
+ category_sample_size: 10
119
+ upsampling_factor: 0.5
120
+ category_upsampling_factor: 0.5
121
+ dataset_upsampling_factor: 0.5
122
+ dataset_scaling_factor: 1.2
123
+ max_batch_size: 16
124
+ min_batch_size: 1
125
+ train_shape_file:
126
+ - exp_voxlingua107_only/lid_stats_16k/train/speech_shape
127
+ valid_shape_file:
128
+ - exp_voxlingua107_only/lid_stats_16k/valid/speech_shape
129
+ batch_type: catpow
130
+ language_upsampling_factor: 0.5
131
+ valid_batch_type: null
132
+ fold_length:
133
+ - 120000
134
+ sort_in_batch: descending
135
+ shuffle_within_batch: false
136
+ sort_batch: descending
137
+ multiple_iterator: false
138
+ chunk_length: 500
139
+ chunk_shift_ratio: 0.5
140
+ num_cache_chunks: 1024
141
+ chunk_excluded_key_prefixes: []
142
+ chunk_default_fs: null
143
+ chunk_max_abs_length: null
144
+ chunk_discard_short_samples: true
145
+ train_data_path_and_name_and_type:
146
+ - - dump/raw/train_voxlingua107_lang/wav.scp
147
+ - speech
148
+ - sound
149
+ - - dump/raw/train_voxlingua107_lang/utt2lang
150
+ - lid_labels
151
+ - text
152
+ valid_data_path_and_name_and_type:
153
+ - - dump/raw/dev_voxlingua107_lang/wav.scp
154
+ - speech
155
+ - sound
156
+ - - dump/raw/dev_voxlingua107_lang/utt2lang
157
+ - lid_labels
158
+ - text
159
+ multi_task_dataset: false
160
+ allow_variable_data_keys: false
161
+ max_cache_size: 0.0
162
+ max_cache_fd: 32
163
+ allow_multi_rates: false
164
+ valid_max_cache_size: null
165
+ exclude_weight_decay: false
166
+ exclude_weight_decay_conf: {}
167
+ optim: adam
168
+ optim_conf:
169
+ lr: 5.0e-06
170
+ betas:
171
+ - 0.9
172
+ - 0.98
173
+ scheduler: tristagelr
174
+ scheduler_conf:
175
+ max_steps: 30000
176
+ warmup_ratio: 0.3
177
+ hold_ratio: 0.2
178
+ decay_ratio: 0.5
179
+ init_lr_scale: 0.6
180
+ final_lr_scale: 0.1
181
+ init: null
182
+ use_preprocessor: true
183
+ input_size: null
184
+ target_duration: 3.0
185
+ lang2utt: dump/raw/train_voxlingua107_lang/lang2utt
186
+ lang_num: 107
187
+ sample_rate: 16000
188
+ num_eval: 10
189
+ rir_scp: ''
190
+ model: upstream_condition
191
+ model_conf:
192
+ lang2vec_conditioning_layers:
193
+ - 32
194
+ - 36
195
+ - 40
196
+ - 44
197
+ apply_intermediate_lang2vec_loss: true
198
+ apply_intermediate_lang2vec_condition: true
199
+ inter_lang2vec_loss_weight: 0.4
200
+ cutoff_gradient_from_backbone: true
201
+ cutoff_gradient_before_condproj: true
202
+ shared_conditioning_proj: false
203
+ frontend: s3prl_condition
204
+ frontend_conf:
205
+ frontend_conf:
206
+ upstream: hf_wav2vec2_condition
207
+ path_or_url: facebook/mms-1b
208
+ download_dir: ./hub
209
+ multilayer_feature: true
210
+ specaug: null
211
+ specaug_conf: {}
212
+ normalize: utterance_mvn
213
+ normalize_conf:
214
+ norm_vars: false
215
+ encoder: ecapa_tdnn
216
+ encoder_conf:
217
+ model_scale: 8
218
+ ndim: 512
219
+ output_size: 1536
220
+ pooling: chn_attn_stat
221
+ pooling_conf: {}
222
+ projector: rawnet3
223
+ projector_conf:
224
+ output_size: 192
225
+ encoder_condition: identity
226
+ encoder_condition_conf: {}
227
+ pooling_condition: chn_attn_stat
228
+ pooling_condition_conf: {}
229
+ projector_condition: rawnet3
230
+ projector_condition_conf: {}
231
+ preprocessor: lid
232
+ preprocessor_conf:
233
+ fix_duration: false
234
+ sample_rate: 16000
235
+ noise_apply_prob: 0.0
236
+ noise_info:
237
+ - - 1.0
238
+ - dump/raw/musan_speech.scp
239
+ - - 4
240
+ - 7
241
+ - - 13
242
+ - 20
243
+ - - 1.0
244
+ - dump/raw/musan_noise.scp
245
+ - - 1
246
+ - 1
247
+ - - 0
248
+ - 15
249
+ - - 1.0
250
+ - dump/raw/musan_music.scp
251
+ - - 1
252
+ - 1
253
+ - - 5
254
+ - 15
255
+ rir_apply_prob: 0.0
256
+ rir_scp: dump/raw/rirs.scp
257
+ use_lang2vec: true
258
+ lang2vec_type: geo
259
+ loss: aamsoftmax_sc_topk_lang2vec
260
+ loss_conf:
261
+ margin: 0.5
262
+ scale: 30
263
+ K: 3
264
+ mp: 0.06
265
+ k_top: 5
266
+ lang2vec_dim: 299
267
+ lang2vec_type: geo
268
+ lang2vec_weight: 0.2
269
+ required:
270
+ - output_dir
271
+ version: '202506'
272
+ distributed: false
273
+ ```
274
+
275
+ </details>
276
+
277
+
278
+
279
+ ### Citing ESPnet
280
+
281
+ ```BibTex
282
+ @inproceedings{watanabe2018espnet,
283
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
284
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
285
+ year={2018},
286
+ booktitle={Proceedings of Interspeech},
287
+ pages={2207--2211},
288
+ doi={10.21437/Interspeech.2018-1456},
289
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
290
+ }
291
+
292
+
293
+
294
+
295
+
296
+
297
+ ```
298
+
299
+ or arXiv:
300
+
301
+ ```bibtex
302
+ @misc{watanabe2018espnet,
303
+ title={ESPnet: End-to-End Speech Processing Toolkit},
304
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
305
+ year={2018},
306
+ eprint={1804.00015},
307
+ archivePrefix={arXiv},
308
+ primaryClass={cs.CL}
309
+ }
310
+ ```
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/config.yaml ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/voxlingua107_only/mms_ecapa_upcon_32_44_it0.4_independent_frozen.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: category
7
+ valid_iterator_type: category
8
+ output_dir: exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw
9
+ ngpu: 1
10
+ seed: 3702
11
+ num_workers: 8
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: true
30
+ cudnn_deterministic: false
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 30
35
+ patience: null
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ best_model_criterion:
44
+ - - valid
45
+ - accuracy
46
+ - max
47
+ keep_nbest_models: 2
48
+ nbest_averaging_interval: 0
49
+ grad_clip: 9999
50
+ grad_clip_type: 2.0
51
+ grad_noise: false
52
+ accum_grad: 2
53
+ no_forward_run: false
54
+ resume: true
55
+ train_dtype: float32
56
+ use_amp: true
57
+ log_interval: 100
58
+ use_matplotlib: true
59
+ use_tensorboard: true
60
+ create_graph_in_tensorboard: false
61
+ use_wandb: false
62
+ wandb_project: null
63
+ wandb_id: null
64
+ wandb_entity: null
65
+ wandb_name: null
66
+ wandb_model_log_interval: -1
67
+ detect_anomaly: false
68
+ use_adapter: false
69
+ adapter: lora
70
+ save_strategy: all
71
+ adapter_conf: {}
72
+ pretrain_path: null
73
+ init_param: []
74
+ ignore_init_mismatch: false
75
+ freeze_param: []
76
+ num_iters_per_epoch: 1000
77
+ batch_size: 20
78
+ valid_batch_size: null
79
+ batch_bins: 2880000
80
+ valid_batch_bins: null
81
+ category_sample_size: 10
82
+ upsampling_factor: 0.5
83
+ category_upsampling_factor: 0.5
84
+ dataset_upsampling_factor: 0.5
85
+ dataset_scaling_factor: 1.2
86
+ max_batch_size: 16
87
+ min_batch_size: 1
88
+ train_shape_file:
89
+ - exp_voxlingua107_only/lid_stats_16k/train/speech_shape
90
+ valid_shape_file:
91
+ - exp_voxlingua107_only/lid_stats_16k/valid/speech_shape
92
+ batch_type: catpow
93
+ language_upsampling_factor: 0.5
94
+ valid_batch_type: null
95
+ fold_length:
96
+ - 120000
97
+ sort_in_batch: descending
98
+ shuffle_within_batch: false
99
+ sort_batch: descending
100
+ multiple_iterator: false
101
+ chunk_length: 500
102
+ chunk_shift_ratio: 0.5
103
+ num_cache_chunks: 1024
104
+ chunk_excluded_key_prefixes: []
105
+ chunk_default_fs: null
106
+ chunk_max_abs_length: null
107
+ chunk_discard_short_samples: true
108
+ train_data_path_and_name_and_type:
109
+ - - dump/raw/train_voxlingua107_lang/wav.scp
110
+ - speech
111
+ - sound
112
+ - - dump/raw/train_voxlingua107_lang/utt2lang
113
+ - lid_labels
114
+ - text
115
+ valid_data_path_and_name_and_type:
116
+ - - dump/raw/dev_voxlingua107_lang/wav.scp
117
+ - speech
118
+ - sound
119
+ - - dump/raw/dev_voxlingua107_lang/utt2lang
120
+ - lid_labels
121
+ - text
122
+ multi_task_dataset: false
123
+ allow_variable_data_keys: false
124
+ max_cache_size: 0.0
125
+ max_cache_fd: 32
126
+ allow_multi_rates: false
127
+ valid_max_cache_size: null
128
+ exclude_weight_decay: false
129
+ exclude_weight_decay_conf: {}
130
+ optim: adam
131
+ optim_conf:
132
+ lr: 5.0e-06
133
+ betas:
134
+ - 0.9
135
+ - 0.98
136
+ scheduler: tristagelr
137
+ scheduler_conf:
138
+ max_steps: 30000
139
+ warmup_ratio: 0.3
140
+ hold_ratio: 0.2
141
+ decay_ratio: 0.5
142
+ init_lr_scale: 0.6
143
+ final_lr_scale: 0.1
144
+ init: null
145
+ use_preprocessor: true
146
+ input_size: null
147
+ target_duration: 3.0
148
+ lang2utt: dump/raw/train_voxlingua107_lang/lang2utt
149
+ lang_num: 107
150
+ sample_rate: 16000
151
+ num_eval: 10
152
+ rir_scp: ''
153
+ model: upstream_condition
154
+ model_conf:
155
+ lang2vec_conditioning_layers:
156
+ - 32
157
+ - 36
158
+ - 40
159
+ - 44
160
+ apply_intermediate_lang2vec_loss: true
161
+ apply_intermediate_lang2vec_condition: true
162
+ inter_lang2vec_loss_weight: 0.4
163
+ cutoff_gradient_from_backbone: true
164
+ cutoff_gradient_before_condproj: true
165
+ shared_conditioning_proj: false
166
+ frontend: s3prl_condition
167
+ frontend_conf:
168
+ frontend_conf:
169
+ upstream: hf_wav2vec2_condition
170
+ path_or_url: facebook/mms-1b
171
+ download_dir: ./hub
172
+ multilayer_feature: true
173
+ specaug: null
174
+ specaug_conf: {}
175
+ normalize: utterance_mvn
176
+ normalize_conf:
177
+ norm_vars: false
178
+ encoder: ecapa_tdnn
179
+ encoder_conf:
180
+ model_scale: 8
181
+ ndim: 512
182
+ output_size: 1536
183
+ pooling: chn_attn_stat
184
+ pooling_conf: {}
185
+ projector: rawnet3
186
+ projector_conf:
187
+ output_size: 192
188
+ encoder_condition: identity
189
+ encoder_condition_conf: {}
190
+ pooling_condition: chn_attn_stat
191
+ pooling_condition_conf: {}
192
+ projector_condition: rawnet3
193
+ projector_condition_conf: {}
194
+ preprocessor: lid
195
+ preprocessor_conf:
196
+ fix_duration: false
197
+ sample_rate: 16000
198
+ noise_apply_prob: 0.0
199
+ noise_info:
200
+ - - 1.0
201
+ - dump/raw/musan_speech.scp
202
+ - - 4
203
+ - 7
204
+ - - 13
205
+ - 20
206
+ - - 1.0
207
+ - dump/raw/musan_noise.scp
208
+ - - 1
209
+ - 1
210
+ - - 0
211
+ - 15
212
+ - - 1.0
213
+ - dump/raw/musan_music.scp
214
+ - - 1
215
+ - 1
216
+ - - 5
217
+ - 15
218
+ rir_apply_prob: 0.0
219
+ rir_scp: dump/raw/rirs.scp
220
+ use_lang2vec: true
221
+ lang2vec_type: geo
222
+ loss: aamsoftmax_sc_topk_lang2vec
223
+ loss_conf:
224
+ margin: 0.5
225
+ scale: 30
226
+ K: 3
227
+ mp: 0.06
228
+ k_top: 5
229
+ lang2vec_dim: 299
230
+ lang2vec_type: geo
231
+ lang2vec_weight: 0.2
232
+ required:
233
+ - output_dir
234
+ version: '202506'
235
+ distributed: false
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/accuracy.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/backward_time.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/class_loss.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/clip.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/forward_time.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/geo_loss_all.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/geo_loss_downstream.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/gpu_max_cached_mem_GB.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/grad_norm.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/inter_geo_loss_layer32.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/inter_geo_loss_layer36.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/inter_geo_loss_layer40.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/inter_geo_loss_layer44.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/inter_geo_loss_mean.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/iter_time.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/loss.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/loss_scale.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/optim0_lr0.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/optim_step_time.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/images/train_time.png ADDED
exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/valid.accuracy.best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43ab083a61f2c0c48bde60872b9eb67870c8206f9fdc2c2d463c992ea278a8bd
3
+ size 3913686815
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202506'
2
+ files:
3
+ model_file: exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/valid.accuracy.best.pth
4
+ python: 3.11.8 | packaged by conda-forge | (main, Feb 16 2024, 20:53:32) [GCC 12.3.0]
5
+ timestamp: 1755589300.416241
6
+ torch: 2.4.0+cu118
7
+ yaml_files:
8
+ train_config: exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_independent_frozen_raw/config.yaml