Priyanship commited on
Commit
c19f0dd
·
verified ·
1 Parent(s): c1126a2

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/m/triton/scratch/elec/puhe/p/palp3/MUCS/commonvoice_with_xlsr/wav2vec2-xls-r-300m",
3
  "activation_dropout": 0.0,
4
  "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
@@ -42,7 +42,7 @@
42
  2,
43
  2
44
  ],
45
- "ctc_loss_reduction": "mean",
46
  "ctc_zero_infinity": false,
47
  "diversity_loss_weight": 0.1,
48
  "do_stable_layer_norm": true,
@@ -59,13 +59,21 @@
59
  "initializer_range": 0.02,
60
  "intermediate_size": 4096,
61
  "layer_norm_eps": 1e-05,
62
- "layerdrop": 0.0,
 
 
 
 
 
63
  "mask_feature_length": 10,
64
  "mask_feature_min_masks": 0,
65
  "mask_feature_prob": 0.0,
66
  "mask_time_length": 10,
67
  "mask_time_min_masks": 2,
68
- "mask_time_prob": 0.05,
 
 
 
69
  "model_type": "wav2vec2",
70
  "num_adapter_layers": 3,
71
  "num_attention_heads": 16,
@@ -77,7 +85,7 @@
77
  "num_hidden_layers": 24,
78
  "num_negatives": 100,
79
  "output_hidden_size": 1024,
80
- "pad_token_id": 148,
81
  "proj_codevector_dim": 768,
82
  "tdnn_dilation": [
83
  1,
@@ -101,8 +109,8 @@
101
  1
102
  ],
103
  "torch_dtype": "float32",
104
- "transformers_version": "4.43.1",
105
  "use_weighted_layer_sum": false,
106
- "vocab_size": 151,
107
  "xvector_output_dim": 512
108
  }
 
1
  {
2
+ "_name_or_path": "/scratch/elec/puhe/p/palp3/sami_ASR/downloaded-large-sami/wav2vec2-large-sami-22k",
3
  "activation_dropout": 0.0,
4
  "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
 
42
  2,
43
  2
44
  ],
45
+ "ctc_loss_reduction": "sum",
46
  "ctc_zero_infinity": false,
47
  "diversity_loss_weight": 0.1,
48
  "do_stable_layer_norm": true,
 
59
  "initializer_range": 0.02,
60
  "intermediate_size": 4096,
61
  "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
  "mask_feature_length": 10,
69
  "mask_feature_min_masks": 0,
70
  "mask_feature_prob": 0.0,
71
  "mask_time_length": 10,
72
  "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.075,
76
+ "mask_time_selection": "static",
77
  "model_type": "wav2vec2",
78
  "num_adapter_layers": 3,
79
  "num_attention_heads": 16,
 
85
  "num_hidden_layers": 24,
86
  "num_negatives": 100,
87
  "output_hidden_size": 1024,
88
+ "pad_token_id": 0,
89
  "proj_codevector_dim": 768,
90
  "tdnn_dilation": [
91
  1,
 
109
  1
110
  ],
111
  "torch_dtype": "float32",
112
+ "transformers_version": "4.48.3",
113
  "use_weighted_layer_sum": false,
114
+ "vocab_size": 46,
115
  "xvector_output_dim": 512
116
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7e054683214adacbdf0024a70815feace4a1bc7c6b8b0fc74b5155880272744
3
- size 1262426580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:319752d98f832a046929197c9252fd2d091f9e8b2d805e7aade17a58fc9639c6
3
+ size 1261996080
special_tokens_map.json CHANGED
@@ -1,6 +1,30 @@
1
  {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": true,
5
+ "normalized": false,
6
+ "rstrip": true,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": true,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": true,
19
+ "normalized": false,
20
+ "rstrip": true,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": true,
28
+ "single_word": false
29
+ }
30
  }
tokenizer_config.json CHANGED
@@ -1,48 +1,49 @@
1
  {
2
  "added_tokens_decoder": {
3
- "147": {
4
- "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
7
  "rstrip": true,
8
  "single_word": false,
9
  "special": false
10
  },
11
- "148": {
12
- "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
15
  "rstrip": true,
16
  "single_word": false,
17
  "special": false
18
  },
19
- "149": {
20
- "content": "<s>",
21
- "lstrip": false,
22
  "normalized": false,
23
- "rstrip": false,
24
  "single_word": false,
25
- "special": true
26
  },
27
- "150": {
28
- "content": "</s>",
29
- "lstrip": false,
30
  "normalized": false,
31
- "rstrip": false,
32
  "single_word": false,
33
- "special": true
34
  }
35
  },
36
  "bos_token": "<s>",
37
  "clean_up_tokenization_spaces": true,
38
  "do_lower_case": false,
39
  "eos_token": "</s>",
 
40
  "model_max_length": 1000000000000000019884624838656,
41
- "pad_token": "[PAD]",
42
  "processor_class": "Wav2Vec2Processor",
43
  "replace_word_delimiter_char": " ",
44
  "target_lang": null,
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
- "unk_token": "[UNK]",
47
  "word_delimiter_token": "|"
48
  }
 
1
  {
2
  "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
  "lstrip": true,
6
  "normalized": false,
7
  "rstrip": true,
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "1": {
12
+ "content": "<s>",
13
  "lstrip": true,
14
  "normalized": false,
15
  "rstrip": true,
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": true,
22
  "normalized": false,
23
+ "rstrip": true,
24
  "single_word": false,
25
+ "special": false
26
  },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": true,
30
  "normalized": false,
31
+ "rstrip": true,
32
  "single_word": false,
33
+ "special": false
34
  }
35
  },
36
  "bos_token": "<s>",
37
  "clean_up_tokenization_spaces": true,
38
  "do_lower_case": false,
39
  "eos_token": "</s>",
40
+ "extra_special_tokens": {},
41
  "model_max_length": 1000000000000000019884624838656,
42
+ "pad_token": "<pad>",
43
  "processor_class": "Wav2Vec2Processor",
44
  "replace_word_delimiter_char": " ",
45
  "target_lang": null,
46
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
47
+ "unk_token": "<unk>",
48
  "word_delimiter_token": "|"
49
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06506a8c01770d72e91032d86a6d861bf0ea22c208e29e94261ac2099fedd049
3
- size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31db71451cfcf19c3e70545607fc065ba7fab32d28bee8cb16f2e2f67cb5861b
3
+ size 5432
vocab.json CHANGED
@@ -1,151 +1,48 @@
1
  {
2
- "!": 1,
3
- "#": 2,
4
- "$": 3,
5
- "%": 4,
6
- "&": 5,
7
- "*": 6,
8
- "+": 7,
9
- "/": 8,
10
- "0": 9,
11
- "1": 10,
12
- "2": 11,
13
- "3": 12,
14
- "4": 13,
15
- "5": 14,
16
- "6": 15,
17
- "7": 16,
18
- "8": 17,
19
- "9": 18,
20
- ":": 19,
21
- ";": 20,
22
- "<": 21,
23
- "=": 22,
24
- ">": 23,
25
- "@": 24,
26
- "[PAD]": 148,
27
- "[UNK]": 147,
28
- "\\": 25,
29
- "^": 26,
30
- "_": 27,
31
- "`": 28,
32
- "a": 29,
33
- "b": 30,
34
- "c": 31,
35
- "d": 32,
36
- "e": 33,
37
- "f": 34,
38
- "g": 35,
39
- "h": 36,
40
- "i": 37,
41
- "j": 38,
42
- "k": 39,
43
- "l": 40,
44
- "m": 41,
45
- "n": 42,
46
- "o": 43,
47
- "p": 44,
48
- "q": 45,
49
- "r": 46,
50
- "s": 47,
51
- "t": 48,
52
- "u": 49,
53
- "v": 50,
54
- "w": 51,
55
- "x": 52,
56
- "y": 53,
57
- "z": 54,
58
- "{": 55,
59
- "|": 0,
60
- "}": 56,
61
- "~": 57,
62
- "°": 58,
63
- "º": 59,
64
- "×": 60,
65
- "λ": 61,
66
- "μ": 62,
67
- "π": 63,
68
- "φ": 64,
69
- "ω": 65,
70
- "ँ": 66,
71
- "ं": 67,
72
- "ः": 68,
73
- "अ": 69,
74
- "आ": 70,
75
- "इ": 71,
76
- "ई": 72,
77
- "उ": 73,
78
- "ऊ": 74,
79
- "ऋ": 75,
80
- "ऍ": 76,
81
- "ए": 77,
82
- "ऐ": 78,
83
- "ऑ": 79,
84
- "ओ": 80,
85
- "औ": 81,
86
- "क": 82,
87
- "ख": 83,
88
- "ग": 84,
89
- "घ": 85,
90
- "च": 86,
91
- "छ": 87,
92
- "ज": 88,
93
- "झ": 89,
94
- "ञ": 90,
95
- "ट": 91,
96
- "ठ": 92,
97
- "ड": 93,
98
- "ढ": 94,
99
- "ण": 95,
100
- "त": 96,
101
- "थ": 97,
102
- "द": 98,
103
- "ध": 99,
104
- "न": 100,
105
- "ऩ": 101,
106
- "प": 102,
107
- "फ": 103,
108
- "ब": 104,
109
- "भ": 105,
110
- "म": 106,
111
- "य": 107,
112
- "र": 108,
113
- "ऱ": 109,
114
- "ल": 110,
115
- "व": 111,
116
- "श": 112,
117
- "ष": 113,
118
- "स": 114,
119
- "ह": 115,
120
- "़": 116,
121
- "ा": 117,
122
- "ि": 118,
123
- "ी": 119,
124
- "ु": 120,
125
- "ू": 121,
126
- "ृ": 122,
127
- "ॅ": 123,
128
- "े": 124,
129
- "ै": 125,
130
- "ॉ": 126,
131
- "ो": 127,
132
- "ौ": 128,
133
- "्": 129,
134
- "०": 130,
135
- "१": 131,
136
- "२": 132,
137
- "३": 133,
138
- "४": 134,
139
- "५": 135,
140
- "६": 136,
141
- "७": 137,
142
- "८": 138,
143
- "९": 139,
144
- "–": 140,
145
- "•": 141,
146
- "…": 142,
147
- "›": 143,
148
- "⅓": 144,
149
- "→": 145,
150
- "−": 146
151
  }
 
1
  {
2
+ "</s>": 2,
3
+ "<pad>": 0,
4
+ "<s>": 1,
5
+ "<unk>": 3,
6
+ "a": 16,
7
+ "b": 27,
8
+ "c": 28,
9
+ "d": 23,
10
+ "e": 19,
11
+ "f": 26,
12
+ "g": 25,
13
+ "h": 8,
14
+ "i": 9,
15
+ "j": 11,
16
+ "k": 21,
17
+ "l": 20,
18
+ "m": 18,
19
+ "n": 10,
20
+ "o": 7,
21
+ "p": 15,
22
+ "q": 31,
23
+ "r": 22,
24
+ "s": 17,
25
+ "t": 14,
26
+ "u": 6,
27
+ "v": 5,
28
+ "w": 30,
29
+ "x": 32,
30
+ "y": 13,
31
+ "z": 29,
32
+ "|": 4,
33
+ "á": 34,
34
+ "ä": 12,
35
+ "å": 33,
36
+ "æ": 35,
37
+ "é": 36,
38
+ "ï": 37,
39
+ "ö": 24,
40
+ "ø": 38,
41
+ "ü": 39,
42
+ "č": 40,
43
+ "đ": 41,
44
+ "ŋ": 42,
45
+ "š": 43,
46
+ "ŧ": 44,
47
+ "ž": 45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  }