bomolopuu commited on
Commit
a0d84ab
·
verified ·
1 Parent(s): 9445e2f

Training in progress, step 84

Browse files
.ipynb_checkpoints/Untitled-checkpoint.ipynb ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "de57d37b-73e7-40aa-a922-7c4a9fbc8085",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Обработка тренировочного датасета...\n",
14
+ "Загружаем список файлов из ngn/train_big.tsv\n",
15
+ "Проверяем размеры 1120 файлов (ищем файлы < 1.0MB)...\n",
16
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/02012020/995_02012020.wav (2.06MB > 1.0MB)\n",
17
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/26122019/110.wav, размер: 0.79MB\n",
18
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/27122019/321untitled.wav, размер: 0.43MB\n",
19
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1448untitled.wav, размер: 0.55MB\n",
20
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1866_10012020.wav, размер: 0.22MB\n",
21
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1986untitled.wav, размер: 0.32MB\n",
22
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/30122019/658untitled.wav (1.26MB > 1.0MB)\n",
23
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1064untitled.wav (1.52MB > 1.0MB)\n",
24
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1098untitled.wav (1.09MB > 1.0MB)\n",
25
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/26122019/116.wav (1.17MB > 1.0MB)\n",
26
+ "Проверено 100/1120 файлов...\n",
27
+ "Проверено 200/1120 файлов...\n"
28
+ ]
29
+ }
30
+ ],
31
+ "source": [
32
+ "import os\n",
33
+ "from google.cloud import storage\n",
34
+ "import pandas as pd\n",
35
+ "from typing import Tuple, List, Dict\n",
36
+ "import io\n",
37
+ "\n",
38
+ "def check_blob_exists_and_size(bucket, possible_paths: List[str]) -> Tuple[bool, str, float]:\n",
39
+ " \"\"\"\n",
40
+ " Проверяет существование файла по всем возможным путям и его размер\n",
41
+ " Returns:\n",
42
+ " Tuple[bool, str, float]: (найден ли файл, путь к файлу, размер в MB)\n",
43
+ " \"\"\"\n",
44
+ " for path in possible_paths:\n",
45
+ " blob = bucket.blob(path)\n",
46
+ " try:\n",
47
+ " if blob.exists():\n",
48
+ " blob.reload() # Загружаем метаданные\n",
49
+ " size_mb = blob.size / (1024 * 1024)\n",
50
+ " return True, path, size_mb\n",
51
+ " except Exception as e:\n",
52
+ " print(f\"Ошибка при проверке файла {path}: {str(e)}\")\n",
53
+ " continue\n",
54
+ " return False, \"\", 0.0\n",
55
+ "\n",
56
+ "def generate_possible_paths(original_path: str, clips_prefix: str) -> List[str]:\n",
57
+ " \"\"\"\n",
58
+ " Генерирует все возможные варианты пути к файлу с учетом префикса\n",
59
+ " \"\"\"\n",
60
+ " parts = original_path.split('/')\n",
61
+ " if len(parts) < 2:\n",
62
+ " return []\n",
63
+ " current_folder = parts[-2] # получаем название текущей папки\n",
64
+ " filename = parts[-1] # получаем имя файла с расширением\n",
65
+ " base_dir = '/'.join(parts[:-1]) # получаем путь к директории\n",
66
+ "\n",
67
+ " # Разделяем имя файла на имя и расширение\n",
68
+ " name, extension = os.path.splitext(filename)\n",
69
+ " \n",
70
+ " # Разделяем имя по знаку подчеркивания и берем левую часть\n",
71
+ " base_name = name.split('_')[0]\n",
72
+ " \n",
73
+ " # Формируем все возможные варианты имен файлов\n",
74
+ " possible_names = [\n",
75
+ " f\"{base_name}{extension}\", # просто имя с расширением\n",
76
+ " f\"{base_name}_{current_folder}{extension}\", # имя + текущая папка\n",
77
+ " f\"{base_name}untitled{extension}\" # имя + untitled\n",
78
+ " ]\n",
79
+ " \n",
80
+ " # Формируем полные пути с префиксом\n",
81
+ " possible_paths = [f\"{clips_prefix}/{base_dir}/{name}\" for name in possible_names]\n",
82
+ " \n",
83
+ " return list(set(possible_paths)) # удаляем возможные дубликаты\n",
84
+ "\n",
85
+ "def check_all_files_sizes(bucket_name: str, tsv_path: str, clips_prefix: str, max_size_mb: float = 1.0) -> Dict[str, Tuple[str, float]]:\n",
86
+ " \"\"\"\n",
87
+ " Про��еряет размеры всех файлов и возвращает словарь с информацией о файлах меньше max_size_mb\n",
88
+ " \"\"\"\n",
89
+ " storage_client = storage.Client()\n",
90
+ " bucket = storage_client.bucket(bucket_name)\n",
91
+ " \n",
92
+ " print(f\"Загружаем список файлов из {tsv_path}\")\n",
93
+ " blob = bucket.blob(tsv_path)\n",
94
+ " content = blob.download_as_string()\n",
95
+ " df = pd.read_csv(io.BytesIO(content), sep='\\t')\n",
96
+ " \n",
97
+ " valid_files = {}\n",
98
+ " missing_files = []\n",
99
+ " oversized_files = []\n",
100
+ " \n",
101
+ " total_files = len(df)\n",
102
+ " print(f\"Проверяем размеры {total_files} файлов (ищем файлы < {max_size_mb}MB)...\")\n",
103
+ " \n",
104
+ " # Статистика по размерам\n",
105
+ " size_stats = {\n",
106
+ " '0-0.5MB': 0,\n",
107
+ " '0.5-1MB': 0,\n",
108
+ " '1-1.5MB': 0,\n",
109
+ " '1.5-2MB': 0,\n",
110
+ " '>2MB': 0\n",
111
+ " }\n",
112
+ " \n",
113
+ " # Проверяем каждый файл\n",
114
+ " for idx, row in df.iterrows():\n",
115
+ " if 'path' not in row or pd.isna(row['path']):\n",
116
+ " continue\n",
117
+ " \n",
118
+ " original_path = row['path'].strip()\n",
119
+ " possible_paths = generate_possible_paths(original_path, clips_prefix)\n",
120
+ " exists, real_path, size_mb = check_blob_exists_and_size(bucket, possible_paths)\n",
121
+ " \n",
122
+ " if exists:\n",
123
+ " # Обновляем статистику по размерам\n",
124
+ " if size_mb > 2:\n",
125
+ " size_stats['>2MB'] += 1\n",
126
+ " elif size_mb > 1.5:\n",
127
+ " size_stats['1.5-2MB'] += 1\n",
128
+ " elif size_mb > 1:\n",
129
+ " size_stats['1-1.5MB'] += 1\n",
130
+ " elif size_mb > 0.5:\n",
131
+ " size_stats['0.5-1MB'] += 1\n",
132
+ " else:\n",
133
+ " size_stats['0-0.5MB'] += 1\n",
134
+ " \n",
135
+ " if size_mb < max_size_mb:\n",
136
+ " valid_files[original_path] = (real_path, size_mb)\n",
137
+ " if len(valid_files) <= 5:\n",
138
+ " print(f\"Подходящий файл найден: {real_path}, размер: {size_mb:.2f}MB\")\n",
139
+ " else:\n",
140
+ " oversized_files.append((original_path, size_mb))\n",
141
+ " if len(oversized_files) <= 5:\n",
142
+ " print(f\"Файл слишком большой: {real_path} ({size_mb:.2f}MB > {max_size_mb}MB)\")\n",
143
+ " else:\n",
144
+ " missing_files.append(original_path)\n",
145
+ " \n",
146
+ " if (idx + 1) % 100 == 0:\n",
147
+ " print(f\"Проверено {idx + 1}/{total_files} файлов...\")\n",
148
+ " \n",
149
+ " # Выводим статистику\n",
150
+ " print(\"\\nРезультаты проверки размеров:\")\n",
151
+ " print(f\"Всего файлов: {total_files}\")\n",
152
+ " print(f\"Файлов не найдено: {len(missing_files)}\")\n",
153
+ " print(f\"\\nРаспределение по размерам:\")\n",
154
+ " for size_range, count in size_stats.items():\n",
155
+ " print(f\"{size_range}: {count} файлов\")\n",
156
+ " \n",
157
+ " return valid_files, missing_files, oversized_files\n",
158
+ "\n",
159
+ "def create_filtered_dataset(bucket_name: str, tsv_path: str, clips_prefix: str, max_size_mb: float = 1.0) -> pd.DataFrame:\n",
160
+ " \"\"\"\n",
161
+ " Создает датафрейм только с файлами подходящего размера\n",
162
+ " \"\"\"\n",
163
+ " # Проверяем размеры всех файлов\n",
164
+ " valid_files, missing_files, oversized_files = check_all_files_sizes(\n",
165
+ " bucket_name, tsv_path, clips_prefix, max_size_mb\n",
166
+ " )\n",
167
+ " \n",
168
+ " # Загружаем исходный датафрейм\n",
169
+ " storage_client = storage.Client()\n",
170
+ " bucket = storage_client.bucket(bucket_name)\n",
171
+ " blob = bucket.blob(tsv_path)\n",
172
+ " content = blob.download_as_string()\n",
173
+ " df = pd.read_csv(io.BytesIO(content), sep='\\t')\n",
174
+ " \n",
175
+ " # Очищаем датафрейм\n",
176
+ " df = df.dropna(subset=['path', 'sentence_normalized'])\n",
177
+ " df = df[\n",
178
+ " (df['path'].str.strip() != '') & \n",
179
+ " (df['sentence_normalized'].str.strip() != '')\n",
180
+ " ]\n",
181
+ " \n",
182
+ " # Фильтруем датафрейм\n",
183
+ " filtered_df = df[df['path'].isin(valid_files.keys())].copy()\n",
184
+ " \n",
185
+ " # Обновляем пути и создаем аудио колонку\n",
186
+ " filtered_df['path'] = filtered_df['path'].apply(lambda x: valid_files[x][0])\n",
187
+ " filtered_df['audio'] = filtered_df['path'].apply(lambda x: f\"gs://{bucket_name}/{x}\")\n",
188
+ " \n",
189
+ " # Создаем тройную транскрипцию\n",
190
+ " filtered_df['sentence_normalized'] = filtered_df['sentence_normalized'].apply(\n",
191
+ " lambda x: ' '.join([x.strip()] * 3)\n",
192
+ " )\n",
193
+ " \n",
194
+ " print(f\"\\nСоздан отфильтрованный датасет с {len(filtered_df)} записями\")\n",
195
+ " return filtered_df\n",
196
+ "\n",
197
+ "def process_datasets(bucket_name: str, train_tsv: str, test_tsv: str, clips_prefix: str, max_size_mb: float = 1.0):\n",
198
+ " \"\"\"\n",
199
+ " Обрабатывает тренировочный и тестовый датасеты\n",
200
+ " \"\"\"\n",
201
+ " print(\"Обработка тренировочного датасета...\")\n",
202
+ " filtered_train_df = create_filtered_dataset(\n",
203
+ " bucket_name=bucket_name,\n",
204
+ " tsv_path=train_tsv,\n",
205
+ " clips_prefix=clips_prefix,\n",
206
+ " max_size_mb=max_size_mb\n",
207
+ " )\n",
208
+ " \n",
209
+ " print(\"\\nОбработка тестового датасета...\")\n",
210
+ " filtered_test_df = create_filtered_dataset(\n",
211
+ " bucket_name=bucket_name,\n",
212
+ " tsv_path=test_tsv,\n",
213
+ " clips_prefix=clips_prefix,\n",
214
+ " max_size_mb=max_size_mb\n",
215
+ " )\n",
216
+ " \n",
217
+ " # Сохраняем результаты\n",
218
+ " filtered_train_df.to_csv('1filtered_train_dataset.tsv', sep='\\t', index=False)\n",
219
+ " filtered_test_df.to_csv('filtered_test_dataset.tsv', sep='\\t', index=False)\n",
220
+ " \n",
221
+ " return filtered_train_df, filtered_test_df\n",
222
+ "\n",
223
+ "if __name__ == \"__main__\":\n",
224
+ " # Обрабатываем оба датасета\n",
225
+ " filtered_train_df, filtered_test_df = process_datasets(\n",
226
+ " bucket_name='ngen_model_fine_tuned',\n",
227
+ " train_tsv='ngn/train_big.tsv',\n",
228
+ " test_tsv='ngn/test_big.tsv',\n",
229
+ " clips_prefix='ngn/clips',\n",
230
+ " max_size_mb=1.0 # ограничение в 1MB\n",
231
+ " )\n",
232
+ " \n",
233
+ "# # Создаем датасеты для huggingface\n",
234
+ "# from datasets import Dataset\n",
235
+ "# from datasets.features import Audio\n",
236
+ " \n",
237
+ "# train_dataset = Dataset.from_pandas(filtered_train_df)\n",
238
+ "# test_dataset = Dataset.from_pandas(filtered_test_df)\n",
239
+ "# print(\"Созданы датасеты\")\n",
240
+ " \n",
241
+ "# train_dataset = train_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
242
+ "# test_dataset = test_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
243
+ "# print(\"Колонки аудио преобразованы\")"
244
+ ]
245
+ },
246
+ {
247
+ "cell_type": "code",
248
+ "execution_count": null,
249
+ "id": "89853d1c-a7db-498b-bd45-bc6e72b86b4e",
250
+ "metadata": {},
251
+ "outputs": [],
252
+ "source": []
253
+ }
254
+ ],
255
+ "metadata": {
256
+ "environment": {
257
+ "kernel": "conda-base-py",
258
+ "name": "workbench-notebooks.m126",
259
+ "type": "gcloud",
260
+ "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m126"
261
+ },
262
+ "kernelspec": {
263
+ "display_name": "Python 3 (ipykernel) (Local)",
264
+ "language": "python",
265
+ "name": "conda-base-py"
266
+ },
267
+ "language_info": {
268
+ "codemirror_mode": {
269
+ "name": "ipython",
270
+ "version": 3
271
+ },
272
+ "file_extension": ".py",
273
+ "mimetype": "text/x-python",
274
+ "name": "python",
275
+ "nbconvert_exporter": "python",
276
+ "pygments_lexer": "ipython3",
277
+ "version": "3.10.15"
278
+ }
279
+ },
280
+ "nbformat": 4,
281
+ "nbformat_minor": 5
282
+ }
.ipynb_checkpoints/filtered_train_dataset-checkpoint.tsv ADDED
The diff for this file is too large to render. See raw diff
 
Untitled.ipynb ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "de57d37b-73e7-40aa-a922-7c4a9fbc8085",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Обработка тренировочного датасета...\n",
14
+ "Загружаем список файлов из ngn/train_big.tsv\n",
15
+ "Проверяем размеры 1120 файлов (ищем файлы < 1.0MB)...\n",
16
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/02012020/995_02012020.wav (2.06MB > 1.0MB)\n",
17
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/26122019/110.wav, размер: 0.79MB\n",
18
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/27122019/321untitled.wav, размер: 0.43MB\n",
19
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1448untitled.wav, размер: 0.55MB\n",
20
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1866_10012020.wav, размер: 0.22MB\n",
21
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1986untitled.wav, размер: 0.32MB\n",
22
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/30122019/658untitled.wav (1.26MB > 1.0MB)\n",
23
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1064untitled.wav (1.52MB > 1.0MB)\n",
24
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1098untitled.wav (1.09MB > 1.0MB)\n",
25
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/26122019/116.wav (1.17MB > 1.0MB)\n",
26
+ "Проверено 100/1120 файлов...\n",
27
+ "Проверено 200/1120 файлов...\n",
28
+ "Проверено 400/1120 файлов...\n",
29
+ "Проверено 500/1120 файлов...\n",
30
+ "Проверено 600/1120 файлов...\n",
31
+ "Проверено 700/1120 файлов...\n",
32
+ "Проверено 800/1120 файлов...\n",
33
+ "Проверено 900/1120 файлов...\n",
34
+ "Проверено 1000/1120 файлов...\n",
35
+ "Проверено 1100/1120 файлов...\n",
36
+ "\n",
37
+ "Результаты проверки размеров:\n",
38
+ "Всего файлов: 1120\n",
39
+ "Файлов не найдено: 21\n",
40
+ "\n",
41
+ "Распределение по размерам:\n",
42
+ "0-0.5MB: 297 файлов\n",
43
+ "0.5-1MB: 364 файлов\n",
44
+ "1-1.5MB: 218 файлов\n",
45
+ "1.5-2MB: 65 файлов\n",
46
+ ">2MB: 63 файлов\n",
47
+ "\n",
48
+ "Создан отфильтрованный датасет с 660 записями\n",
49
+ "\n",
50
+ "Обработка тестового датасета...\n",
51
+ "Загружаем список файлов из ngn/test_big.tsv\n",
52
+ "Проверяем размеры 281 файлов (ищем файлы < 1.0MB)...\n",
53
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1832_10012020.wav, размер: 0.23MB\n",
54
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1632untitled.wav, размер: 0.68MB\n",
55
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/26122019/111.wav, размер: 0.79MB\n",
56
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/13012020/2021untitled.wav (1.07MB > 1.0MB)\n",
57
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1249untitled.wav, размер: 0.53MB\n",
58
+ "Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/30122019/800untitled.wav, размер: 0.49MB\n",
59
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/31122019/803untitled.wav (1.39MB > 1.0MB)\n",
60
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/25122019/033_25122019.wav (2.82MB > 1.0MB)\n",
61
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/02012020/997_02012020.wav (1.15MB > 1.0MB)\n",
62
+ "Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1455untitled.wav (1.40MB > 1.0MB)\n",
63
+ "Проверено 100/281 файлов...\n",
64
+ "Проверено 200/281 файлов...\n",
65
+ "\n",
66
+ "Результаты проверки размеров:\n",
67
+ "Всего файлов: 281\n",
68
+ "Файлов не найдено: 6\n",
69
+ "\n",
70
+ "Распределение по размерам:\n",
71
+ "0-0.5MB: 65 файлов\n",
72
+ "0.5-1MB: 102 файлов\n",
73
+ "1-1.5MB: 57 файлов\n",
74
+ "1.5-2MB: 12 файлов\n",
75
+ ">2MB: 13 файлов\n",
76
+ "\n",
77
+ "Создан отфильтрованный датасет с 167 записями\n"
78
+ ]
79
+ }
80
+ ],
81
+ "source": [
82
+ "import os\n",
83
+ "from google.cloud import storage\n",
84
+ "import pandas as pd\n",
85
+ "from typing import Tuple, List, Dict\n",
86
+ "import io\n",
87
+ "\n",
88
+ "def check_blob_exists_and_size(bucket, possible_paths: List[str]) -> Tuple[bool, str, float]:\n",
89
+ " \"\"\"\n",
90
+ " Проверяет существование файла по всем возможным путям и его размер\n",
91
+ " Returns:\n",
92
+ " Tuple[bool, str, float]: (найден ли файл, путь к файлу, размер в MB)\n",
93
+ " \"\"\"\n",
94
+ " for path in possible_paths:\n",
95
+ " blob = bucket.blob(path)\n",
96
+ " try:\n",
97
+ " if blob.exists():\n",
98
+ " blob.reload() # Загружаем метаданные\n",
99
+ " size_mb = blob.size / (1024 * 1024)\n",
100
+ " return True, path, size_mb\n",
101
+ " except Exception as e:\n",
102
+ " print(f\"Ошибка при проверке файла {path}: {str(e)}\")\n",
103
+ " continue\n",
104
+ " return False, \"\", 0.0\n",
105
+ "\n",
106
+ "def generate_possible_paths(original_path: str, clips_prefix: str) -> List[str]:\n",
107
+ " \"\"\"\n",
108
+ " Генерирует все возможные варианты пути к файлу с учетом префикса\n",
109
+ " \"\"\"\n",
110
+ " parts = original_path.split('/')\n",
111
+ " if len(parts) < 2:\n",
112
+ " return []\n",
113
+ " current_folder = parts[-2] # получаем название текущей папки\n",
114
+ " filename = parts[-1] # получаем имя файла с расширением\n",
115
+ " base_dir = '/'.join(parts[:-1]) # получаем путь к директории\n",
116
+ "\n",
117
+ " # Разделяем имя файла на имя и расширение\n",
118
+ " name, extension = os.path.splitext(filename)\n",
119
+ " \n",
120
+ " # Разделяем имя по знаку подчеркивания и берем левую часть\n",
121
+ " base_name = name.split('_')[0]\n",
122
+ " \n",
123
+ " # Формируем все возможные варианты имен файлов\n",
124
+ " possible_names = [\n",
125
+ " f\"{base_name}{extension}\", # просто имя с расширением\n",
126
+ " f\"{base_name}_{current_folder}{extension}\", # имя + текущая папка\n",
127
+ " f\"{base_name}untitled{extension}\" # имя + untitled\n",
128
+ " ]\n",
129
+ " \n",
130
+ " # Формируем полные пути с префиксом\n",
131
+ " possible_paths = [f\"{clips_prefix}/{base_dir}/{name}\" for name in possible_names]\n",
132
+ " \n",
133
+ " return list(set(possible_paths)) # удаляем возможные дубликаты\n",
134
+ "\n",
135
+ "def check_all_files_sizes(bucket_name: str, tsv_path: str, clips_prefix: str, max_size_mb: float = 1.0) -> Dict[str, Tuple[str, float]]:\n",
136
+ " \"\"\"\n",
137
+ " Проверяет размеры всех файлов и возвращает словарь с информацией о файлах меньше max_size_mb\n",
138
+ " \"\"\"\n",
139
+ " storage_client = storage.Client()\n",
140
+ " bucket = storage_client.bucket(bucket_name)\n",
141
+ " \n",
142
+ " print(f\"Загружаем список файлов из {tsv_path}\")\n",
143
+ " blob = bucket.blob(tsv_path)\n",
144
+ " content = blob.download_as_string()\n",
145
+ " df = pd.read_csv(io.BytesIO(content), sep='\\t')\n",
146
+ " \n",
147
+ " valid_files = {}\n",
148
+ " missing_files = []\n",
149
+ " oversized_files = []\n",
150
+ " \n",
151
+ " total_files = len(df)\n",
152
+ " print(f\"Проверяем размеры {total_files} файлов (ищем файлы < {max_size_mb}MB)...\")\n",
153
+ " \n",
154
+ " # Статистика по размерам\n",
155
+ " size_stats = {\n",
156
+ " '0-0.5MB': 0,\n",
157
+ " '0.5-1MB': 0,\n",
158
+ " '1-1.5MB': 0,\n",
159
+ " '1.5-2MB': 0,\n",
160
+ " '>2MB': 0\n",
161
+ " }\n",
162
+ " \n",
163
+ " # Проверяем каждый файл\n",
164
+ " for idx, row in df.iterrows():\n",
165
+ " if 'path' not in row or pd.isna(row['path']):\n",
166
+ " continue\n",
167
+ " \n",
168
+ " original_path = row['path'].strip()\n",
169
+ " possible_paths = generate_possible_paths(original_path, clips_prefix)\n",
170
+ " exists, real_path, size_mb = check_blob_exists_and_size(bucket, possible_paths)\n",
171
+ " \n",
172
+ " if exists:\n",
173
+ " # Обновляем статистику по размерам\n",
174
+ " if size_mb > 2:\n",
175
+ " size_stats['>2MB'] += 1\n",
176
+ " elif size_mb > 1.5:\n",
177
+ " size_stats['1.5-2MB'] += 1\n",
178
+ " elif size_mb > 1:\n",
179
+ " size_stats['1-1.5MB'] += 1\n",
180
+ " elif size_mb > 0.5:\n",
181
+ " size_stats['0.5-1MB'] += 1\n",
182
+ " else:\n",
183
+ " size_stats['0-0.5MB'] += 1\n",
184
+ " \n",
185
+ " if size_mb < max_size_mb:\n",
186
+ " valid_files[original_path] = (real_path, size_mb)\n",
187
+ " if len(valid_files) <= 5:\n",
188
+ " print(f\"Подходящий файл найден: {real_path}, размер: {size_mb:.2f}MB\")\n",
189
+ " else:\n",
190
+ " oversized_files.append((original_path, size_mb))\n",
191
+ " if len(oversized_files) <= 5:\n",
192
+ " print(f\"Файл слишком большой: {real_path} ({size_mb:.2f}MB > {max_size_mb}MB)\")\n",
193
+ " else:\n",
194
+ " missing_files.append(original_path)\n",
195
+ " \n",
196
+ " if (idx + 1) % 100 == 0:\n",
197
+ " print(f\"Проверено {idx + 1}/{total_files} файлов...\")\n",
198
+ " \n",
199
+ " # Выводим статистику\n",
200
+ " print(\"\\nРезультаты проверки размеров:\")\n",
201
+ " print(f\"Всего файлов: {total_files}\")\n",
202
+ " print(f\"Файлов не найдено: {len(missing_files)}\")\n",
203
+ " print(f\"\\nРаспределение по размерам:\")\n",
204
+ " for size_range, count in size_stats.items():\n",
205
+ " print(f\"{size_range}: {count} файлов\")\n",
206
+ " \n",
207
+ " return valid_files, missing_files, oversized_files\n",
208
+ "\n",
209
+ "def create_filtered_dataset(bucket_name: str, tsv_path: str, clips_prefix: str, max_size_mb: float = 1.0) -> pd.DataFrame:\n",
210
+ " \"\"\"\n",
211
+ " Создает датафрейм только с файлами подходящего размера\n",
212
+ " \"\"\"\n",
213
+ " # Проверяем размеры всех файлов\n",
214
+ " valid_files, missing_files, oversized_files = check_all_files_sizes(\n",
215
+ " bucket_name, tsv_path, clips_prefix, max_size_mb\n",
216
+ " )\n",
217
+ " \n",
218
+ " # Загружаем исходный датафрейм\n",
219
+ " storage_client = storage.Client()\n",
220
+ " bucket = storage_client.bucket(bucket_name)\n",
221
+ " blob = bucket.blob(tsv_path)\n",
222
+ " content = blob.download_as_string()\n",
223
+ " df = pd.read_csv(io.BytesIO(content), sep='\\t')\n",
224
+ " \n",
225
+ " # Очищаем датафрейм\n",
226
+ " df = df.dropna(subset=['path', 'sentence_normalized'])\n",
227
+ " df = df[\n",
228
+ " (df['path'].str.strip() != '') & \n",
229
+ " (df['sentence_normalized'].str.strip() != '')\n",
230
+ " ]\n",
231
+ " \n",
232
+ " # Фильтруем датафрейм\n",
233
+ " filtered_df = df[df['path'].isin(valid_files.keys())].copy()\n",
234
+ " \n",
235
+ " # Обновляем пути и создаем аудио колонку\n",
236
+ " filtered_df['path'] = filtered_df['path'].apply(lambda x: valid_files[x][0])\n",
237
+ " filtered_df['audio'] = filtered_df['path'].apply(lambda x: f\"gs://{bucket_name}/{x}\")\n",
238
+ " \n",
239
+ " # Создаем тройную транскрипцию\n",
240
+ " filtered_df['sentence_normalized'] = filtered_df['sentence_normalized'].apply(\n",
241
+ " lambda x: ' '.join([x.strip()] * 3)\n",
242
+ " )\n",
243
+ " \n",
244
+ " print(f\"\\nСоздан отфильтрованный датасет с {len(filtered_df)} записями\")\n",
245
+ " return filtered_df\n",
246
+ "\n",
247
+ "def process_datasets(bucket_name: str, train_tsv: str, test_tsv: str, clips_prefix: str, max_size_mb: float = 1.0):\n",
248
+ " \"\"\"\n",
249
+ " Обрабатывает тренировочный и тестовый датасеты\n",
250
+ " \"\"\"\n",
251
+ " print(\"Обработка тренировочного датасета...\")\n",
252
+ " filtered_train_df = create_filtered_dataset(\n",
253
+ " bucket_name=bucket_name,\n",
254
+ " tsv_path=train_tsv,\n",
255
+ " clips_prefix=clips_prefix,\n",
256
+ " max_size_mb=max_size_mb\n",
257
+ " )\n",
258
+ " \n",
259
+ " print(\"\\nОбработка тестового датасета...\")\n",
260
+ " filtered_test_df = create_filtered_dataset(\n",
261
+ " bucket_name=bucket_name,\n",
262
+ " tsv_path=test_tsv,\n",
263
+ " clips_prefix=clips_prefix,\n",
264
+ " max_size_mb=max_size_mb\n",
265
+ " )\n",
266
+ " \n",
267
+ " # Сохраняем результаты\n",
268
+ " filtered_train_df.to_csv('1filtered_train_dataset.tsv', sep='\\t', index=False)\n",
269
+ " filtered_test_df.to_csv('filtered_test_dataset.tsv', sep='\\t', index=False)\n",
270
+ " \n",
271
+ " return filtered_train_df, filtered_test_df\n",
272
+ "\n",
273
+ "if __name__ == \"__main__\":\n",
274
+ " # Обрабатываем оба датасета\n",
275
+ " filtered_train_df, filtered_test_df = process_datasets(\n",
276
+ " bucket_name='ngen_model_fine_tuned',\n",
277
+ " train_tsv='ngn/train_big.tsv',\n",
278
+ " test_tsv='ngn/test_big.tsv',\n",
279
+ " clips_prefix='ngn/clips',\n",
280
+ " max_size_mb=1.0 # ограничение в 1MB\n",
281
+ " )\n",
282
+ " \n",
283
+ "# # Создаем датасеты для huggingface\n",
284
+ "# from datasets import Dataset\n",
285
+ "# from datasets.features import Audio\n",
286
+ " \n",
287
+ "# train_dataset = Dataset.from_pandas(filtered_train_df)\n",
288
+ "# test_dataset = Dataset.from_pandas(filtered_test_df)\n",
289
+ "# print(\"Созданы датасеты\")\n",
290
+ " \n",
291
+ "# train_dataset = train_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
292
+ "# test_dataset = test_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
293
+ "# print(\"Колонки аудио преобразованы\")"
294
+ ]
295
+ },
296
+ {
297
+ "cell_type": "code",
298
+ "execution_count": null,
299
+ "id": "89853d1c-a7db-498b-bd45-bc6e72b86b4e",
300
+ "metadata": {},
301
+ "outputs": [],
302
+ "source": []
303
+ }
304
+ ],
305
+ "metadata": {
306
+ "environment": {
307
+ "kernel": "conda-base-py",
308
+ "name": "workbench-notebooks.m126",
309
+ "type": "gcloud",
310
+ "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m126"
311
+ },
312
+ "kernelspec": {
313
+ "display_name": "Python 3 (ipykernel) (Local)",
314
+ "language": "python",
315
+ "name": "conda-base-py"
316
+ },
317
+ "language_info": {
318
+ "codemirror_mode": {
319
+ "name": "ipython",
320
+ "version": 3
321
+ },
322
+ "file_extension": ".py",
323
+ "mimetype": "text/x-python",
324
+ "name": "python",
325
+ "nbconvert_exporter": "python",
326
+ "pygments_lexer": "ipython3",
327
+ "version": "3.10.15"
328
+ }
329
+ },
330
+ "nbformat": 4,
331
+ "nbformat_minor": 5
332
+ }
filtered_test_dataset.tsv ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Unnamed: 0 № Date Francaise sentence path File 2 File 3 Consultant (S= Sualio, Y=Yacou_what'sup Unnamed: 13 Unnamed: 14 Unnamed: 15 sentence_normalized audio
2
+ 1099 1832.0 2020-10-10 La poule va l a pique Mɛnɛnɛ be wo pa ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1832_10012020.wav S Mɛnɛnɛ be wo pa Mɛnɛnɛ be wo pa Mɛnɛnɛ be wo pa gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1832_10012020.wav
3
+ 948 1632.0 2020-01-08 Vous n'avez pas ka̋ nù pálá ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1632untitled.wav S ka nu pala ɔ ka nu pala ɔ ka nu pala ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1632untitled.wav
4
+ 49 111.0 2019-12-26 Je vois 1 chien ŋ̋ gɛ̋ŋ̋ dó yé ngn/clips/Audio_Ngen_2019_2020_2021/26122019/111.wav S ŋ gɛŋ do ye ŋ gɛŋ do ye ŋ gɛŋ do ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/111.wav
5
+ 706 1249.0 2020-01-04 10 bù ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1249untitled.wav S bu bu bu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1249untitled.wav
6
+ 342 800.0 2019-12-30 riz màló ngn/clips/Audio_Ngen_2019_2020_2021/30122019/800untitled.wav S malo malo malo gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/800untitled.wav
7
+ 823 1468.0 2020-01-06 poitrine dìŋ gɔ́ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1468untitled.wav S diŋ gɔ diŋ gɔ diŋ gɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1468untitled.wav
8
+ 306 747.0 2019-12-31 9 ve̋lì ti̋zì ngn/clips/Audio_Ngen_2019_2020_2021/31122019/747untitled.wav S veli tizi veli tizi veli tizi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/747untitled.wav
9
+ 700 1243.0 2020-01-04 10 gɔ̋ bú ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1243untitled.wav S gɔ bu gɔ bu gɔ bu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1243untitled.wav
10
+ 629 1157.0 2020-01-03 Vous envoyez Ka nu ba gba ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1157untitled.wav S Ka nu ba gba Ka nu ba gba Ka nu ba gba gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1157untitled.wav
11
+ 76 151.0 2019-12-26 cendres yépé ngn/clips/Audio_Ngen_2019_2020_2021/26122019/151.wav S yepe yepe yepe gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/151.wav
12
+ 247 662.0 2019-12-30 Leurs wȍ yȍrȍ pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/662untitled.wav S wo yoro palaŋ wo yoro palaŋ wo yoro palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/662untitled.wav
13
+ 649 1178.0 2020-01-03 Maison de femme Lìŋ̀ pàlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1178untitled.wav S Liŋ pala Liŋ pala Liŋ pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1178untitled.wav
14
+ 429 947.0 2020-01-02 Ils vont Wò nűma̋ yȉ gbɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/947_02012020.wav S Wo numa yi gbɛ Wo numa yi gbɛ Wo numa yi gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/947_02012020.wav
15
+ 78 156.0 2019-12-26 trou yɛ̏rɛ̏ ngn/clips/Audio_Ngen_2019_2020_2021/26122019/156.wav S yɛrɛ yɛrɛ yɛrɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/156.wav
16
+ 626 1154.0 2020-01-03 T'envoye A nu na ba gbagbɛ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1154untitled.wav S A nu na ba gbagbɛ A nu na ba gbagbɛ A nu na ba gbagbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1154untitled.wav
17
+ 1074 1804.0 2020-01-09 mensonge kḭ́ná̰ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1804untitled.wav S kina kina kina gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1804untitled.wav
18
+ 1077 1807.0 2020-01-09 vous kà ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1807untitled.wav S ka ka ka gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1807untitled.wav
19
+ 192 391.0 2019-12-28 Je vois 7 marmites ŋ̋ yőrő síénú yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/391untitled.wav S ŋ yoro sienu ye ŋ yoro sienu ye ŋ yoro sienu ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/391untitled.wav
20
+ 350 809.0 2019-12-31 9 branche gùŋ ti̋zī ngn/clips/Audio_Ngen_2019_2020_2021/31122019/809untitled.wav S guŋ tizi guŋ tizi guŋ tizi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/809untitled.wav
21
+ 596 1124.0 2020-01-03 Nous baillons bè lɛ́wóbòlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1124untitled.wav S be lɛwobola be lɛwobola be lɛwobola gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1124untitled.wav
22
+ 873 1545.0 2020-01-07 Tu nous chatoille Ye̋ bá nɛ̀kéŋ́ lȁ ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1545untitled.wav S Ye ba nɛkeŋ la Ye ba nɛkeŋ la Ye ba nɛkeŋ la gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1545untitled.wav
23
+ 817 1462.0 2020-01-06 2 Poitrine dìŋ gɔ́ pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1462untitled.wav S diŋ gɔ palaŋ diŋ gɔ palaŋ diŋ gɔ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1462untitled.wav
24
+ 816 1461.0 2020-01-06 2 Marmite da yoro palaŋ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1461untitled.wav S da yoro palaŋ da yoro palaŋ da yoro palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1461untitled.wav
25
+ 1121 1855.0 2020-10-10 Ils ne Wò bà wó lé pé ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1855_10012020.wav S Wo ba wo le pe ɔ Wo ba wo le pe ɔ Wo ba wo le pe ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1855_10012020.wav
26
+ 410 885.0 2020-01-02 1 âne súfále̋ dó ngn/clips/Audio_Ngen_2019_2020_2021/02012020/885_02012020.wav S sufale do sufale do sufale do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/885_02012020.wav
27
+ 277 702.0 2019-12-31 Nos bá tɛ̋ sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/702untitled.wav S ba tɛ sienu ba tɛ sienu ba tɛ sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/702untitled.wav
28
+ 792 1434.0 2020-01-06 Poitrine dà gɔ́ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1434untitled.wav S da gɔ da gɔ da gɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1434untitled.wav
29
+ 1085 1816.0 2020-01-09 bon gíɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1816untitled.wav S giɛ giɛ giɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1816untitled.wav
30
+ 949 1633.0 2020-01-08 Ils n'ont pas wa̋ nù pálá ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1633untitled.wav S wa nu pala ɔ wa nu pala ɔ wa nu pala ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1633untitled.wav
31
+ 51 113.0 2019-12-26 Je vois 5 chiens ŋ̋ gɛ́ŋ́ sɔ̋ŋ̋ yé ngn/clips/Audio_Ngen_2019_2020_2021/26122019/113.wav S ŋ gɛŋ sɔŋ ye ŋ gɛŋ sɔŋ ye ŋ gɛŋ sɔŋ ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/113.wav
32
+ 196 396.0 2019-12-28 Je vois 9 bouteils de alcool ̀ŋ̋ yò ꜝti̋zì yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/396untitled.wav S ŋ yo ꜝtizi ye ŋ yo ꜝtizi ye ŋ yo ꜝtizi ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/396untitled.wav
33
+ 370 837.0 2020-01-02 veine bàní ngn/clips/Audio_Ngen_2019_2020_2021/02012020/837_02012020.wav S bani bani bani gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/837_02012020.wav
34
+ 70 144.0 2019-12-26 1 chat jāŋúmá dó ngn/clips/Audio_Ngen_2019_2020_2021/26122019/144.wav S jaŋuma do jaŋuma do jaŋuma do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/144.wav
35
+ 265 680.0 2019-12-30 Leur wò gɔ̋ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/680untitled.wav S wo gɔ wo gɔ wo gɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/680untitled.wav
36
+ 1152 1886.0 2020-10-10 Je ne vais pas seche ma wɛ lɛ pia ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1886_10012020.wav S ma wɛ lɛ pia ma wɛ lɛ pia ma wɛ lɛ pia gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1886_10012020.wav
37
+ 1067 1797.0 2020-01-09 embrasser chènwódȁlè ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1797untitled.wav S chenwodale chenwodale chenwodale gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1797untitled.wav
38
+ 1105 1838.0 2020-10-10 La poule ne va pas les pique Mɛnɛnɛ ba wo pa a ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1838_10012020.wav S Mɛnɛnɛ ba wo pa a Mɛnɛnɛ ba wo pa a Mɛnɛnɛ ba wo pa a gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1838_10012020.wav
39
+ 707 1250.0 2020-01-04 10 bù ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1250untitled.wav S bu bu bu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1250untitled.wav
40
+ 374 841.0 2020-01-02 écureuil baŋbo+G829:G841 ngn/clips/Audio_Ngen_2019_2020_2021/02012020/841_02012020.wav S baŋbo+G829:G841 baŋbo+G829:G841 baŋbo+G829:G841 gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/841_02012020.wav
41
+ 710 1263.0 2020-01-04 20 gő bùá pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1263untitled.wav S go bua palaŋ go bua palaŋ go bua palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1263untitled.wav
42
+ 184 383.0 2019-12-28 Je vois 7 termites ŋ̋ dő síénú yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/383untitled.wav S ŋ do sienu ye ŋ do sienu ye ŋ do sienu ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/383untitled.wav
43
+ 423 941.0 2020-01-02 Ils Wő yì lȁ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/941_02012020.wav S Wo yi la Wo yi la Wo yi la gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/941_02012020.wav
44
+ 208 445.0 2019-12-28 Il y a 1 cobra dò dó be̋ nɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/28122019/445untitled.wav S do do be nɔ do do be nɔ do do be nɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/445untitled.wav
45
+ 275 700.0 2019-12-31 Tes í tɛ̋ sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/700untitled.wav S i tɛ sienu i tɛ sienu i tɛ sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/700untitled.wav
46
+ 198 398.0 2019-12-28 Je vois 2 cobra ŋ̋ dő pàlàŋ yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/398untitled.wav S ŋ do palaŋ ye ŋ do palaŋ ye ŋ do palaŋ ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/398untitled.wav
47
+ 924 1607.0 2020-01-08 Vous ne boiviez pas Kɛ ka yi mi ɔ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1607untitled.wav S Kɛ ka yi mi ɔ Kɛ ka yi mi ɔ Kɛ ka yi mi ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1607untitled.wav
48
+ 912 1595.0 2020-01-08 Vous n'avez pas Ka yi mi ɔ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1595untitled.wav S Ka yi mi ɔ Ka yi mi ɔ Ka yi mi ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1595untitled.wav
49
+ 918 1601.0 2020-01-08 Vous boiviez Kɛ ka yi mi ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1601untitled.wav S Kɛ ka yi mi Kɛ ka yi mi Kɛ ka yi mi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1601untitled.wav
50
+ 239 654.0 2019-12-30 Notre bá yórȍ или bá yòrȍ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/654untitled.wav S ba yoro или ba yoro ba yoro или ba yoro ba yoro или ba yoro gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/654untitled.wav
51
+ 432 950.0 2020-01-02 Il a A̋ yȉ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/950_02012020.wav S A yi A yi A yi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/950_02012020.wav
52
+ 859 1527.0 2020-01-07 Swale s'est habitue à la tabac Tawalɛ e Sualuo ɲɔ̀dɔ̀lɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1526untitled.wav S Tawalɛ e Sualuo ɲɔdɔlɛ Tawalɛ e Sualuo ɲɔdɔlɛ Tawalɛ e Sualuo ɲɔdɔlɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1526untitled.wav
53
+ 594 1122.0 2020-01-03 Tu baille yé lɛ́wóbòlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1122untitled.wav S ye lɛwobola ye lɛwobola ye lɛwobola gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1122untitled.wav
54
+ 631 1159.0 2020-01-03 J'envoye Nu na ka gbagbɛ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1159untitled.wav S Nu na ka gbagbɛ Nu na ka gbagbɛ Nu na ka gbagbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1159untitled.wav
55
+ 620 1148.0 2020-01-03 T'envoye A nu na azoŋ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1148untitled.wav S A nu na azoŋ A nu na azoŋ A nu na azoŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1148untitled.wav
56
+ 745 1326.0 ȁkɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1326untitled.wav S akɔ akɔ akɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1326untitled.wav
57
+ 1032 1753.0 2020-01-09 ici je habite ici nɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1753untitled.wav S nɔ nɔ nɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1753untitled.wav
58
+ 990 1682.0 2020-01-08 Ils ne tuent (en train de) Wò ba̋ dő nȍ zɛ̀ ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1682untitled.wav S Wo ba do no zɛ ɔ Wo ba do no zɛ ɔ Wo ba do no zɛ ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1682untitled.wav
59
+ 605 1133.0 2020-01-03 Vous allez kà lő màlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1133untitled.wav S ka lo mala ka lo mala ka lo mala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1133untitled.wav
60
+ 428 946.0 2020-01-02 Vous allez Kà nűma̋ yȉ gbɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/946_02012020.wav S Ka numa yi gbɛ Ka numa yi gbɛ Ka numa yi gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/946_02012020.wav
61
+ 969 1654.0 2020-01-08 Tu ne tues Ì bá dő zɛ̀ ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1654untitled.wav S I ba do zɛ ɔ I ba do zɛ ɔ I ba do zɛ ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1654untitled.wav
62
+ 693 1236.0 2020-01-04 8 pálá sɔ́ŋ̀bȁ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1236untitled.wav S pala sɔŋba pala sɔŋba pala sɔŋba gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1236untitled.wav
63
+ 371 838.0 2020-01-02 aisselle bànnṵ̀lá̰ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/838_02012020.wav S bannula bannula bannula gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/838_02012020.wav
64
+ 367 833.0 2020-01-02 toucher ba̋lɛ̋àmàn ngn/clips/Audio_Ngen_2019_2020_2021/02012020/833_02012020.wav S balɛaman balɛaman balɛaman gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/833_02012020.wav
65
+ 588 1116.0 2020-01-03 repondre lɛ́sḭ́lɛ́ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1116untitled.wav S lɛsilɛ lɛsilɛ lɛsilɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1116untitled.wav
66
+ 353 813.0 2019-12-31 2 cameleon dódóhőlɛ̋ páláŋ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/813untitled.wav S dodoholɛ palaŋ dodoholɛ palaŋ dodoholɛ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/813untitled.wav
67
+ 778 1417.0 2020-01-05 gauyave agoya ngn/clips/Audio_Ngen_2019_2020_2021/05012020/1417untitled.wav S agoya agoya agoya gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/05012020/1417untitled.wav
68
+ 425 943.0 2020-01-02 Tu vais Ì nűma̋ yȉ gbɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/943_02012020.wav S I numa yi gbɛ I numa yi gbɛ I numa yi gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/943_02012020.wav
69
+ 1212 1965.0 2020-01-12 sasesoir trɔ́kálálɛ́ ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1965untitled.wav S trɔkalalɛ trɔkalalɛ trɔkalalɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1965untitled.wav
70
+ 624 1152.0 2020-01-03 Ils envoyent Wa nu na azoŋ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1152untitled.wav S Wa nu na azoŋ Wa nu na azoŋ Wa nu na azoŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1152untitled.wav
71
+ 1163 1914.0 2020-01-12 2 nouriture pɔbelelɛ palaŋ ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1914untitled.wav S pɔbelelɛ palaŋ pɔbelelɛ palaŋ pɔbelelɛ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1914untitled.wav
72
+ 855 1522.0 2020-01-07 1 coq mɛ̀lɛ̀sa̋ dó ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1521untitled.wav S mɛlɛsa do mɛlɛsa do mɛlɛsa do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1521untitled.wav
73
+ 602 1130.0 2020-01-03 Tu vais ì lő màlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1130untitled.wav S i lo mala i lo mala i lo mala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1130untitled.wav
74
+ 115 227.0 2019-12-27 jambe gba̋ ngn/clips/Audio_Ngen_2019_2020_2021/27122019/227untitled.wav S gba gba gba gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/27122019/227untitled.wav
75
+ 163 349.0 2019-12-28 1 manioc gbe̋ne̋ dó ngn/clips/Audio_Ngen_2019_2020_2021/28122019/349untitled.wav S gbene do gbene do gbene do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/349untitled.wav
76
+ 261 676.0 2019-12-30 Ton í gɔ̋ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/676untitled.wav S i gɔ i gɔ i gɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/676untitled.wav
77
+ 237 652.0 2019-12-30 Ton í yòrȍ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/652untitled.wav S i yoro i yoro i yoro gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/652untitled.wav
78
+ 814 1457.0 2020-01-06 2 Marmite gu yoro palaŋ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1457untitled.wav S gu yoro palaŋ gu yoro palaŋ gu yoro palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1457untitled.wav
79
+ 575 1103.0 2020-01-03 feille láŋ́ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1103untitled.wav S laŋ laŋ laŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1103untitled.wav
80
+ 358 819.0 2019-12-31 2 poumons pùlù páláŋ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/819untitled.wav S pulu palaŋ pulu palaŋ pulu palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/819untitled.wav
81
+ 233 648.0 2019-12-30 Nos bá yó pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/648untitled.wav S ba yo palaŋ ba yo palaŋ ba yo palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/648untitled.wav
82
+ 597 1125.0 2020-01-03 Vous baillez kè lɛ́wóbòlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1125untitled.wav S ke lɛwobola ke lɛwobola ke lɛwobola gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1125untitled.wav
83
+ 435 953.0 2020-01-02 Ils ont Wa̋ yȉ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/953_02012020.wav S Wa yi Wa yi Wa yi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/953_02012020.wav
84
+ 1006 1707.0 2020-01-08 Je vais (ŋo) nő málá pɔ̀ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1707untitled.wav S (ŋo) no mala pɔɔ (ŋo) no mala pɔɔ (ŋo) no mala pɔɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1707untitled.wav
85
+ 1140 1874.0 2020-10-10 Ils Wó la̋ ?á pèŋ̀ gbɛ̏ ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1874_10012020.wav S Wo la ?a peŋ gbɛ Wo la ?a peŋ gbɛ Wo la ?a peŋ gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1874_10012020.wav
86
+ 1072 1802.0 2020-01-09 déchirer chi̋chi̋nɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1802untitled.wav S chichinɛ chichinɛ chichinɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1802untitled.wav
87
+ 785 1427.0 2020-01-06 Alcool gɔ̀ŋ yȍ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1427untitled.wav S gɔŋ yo gɔŋ yo gɔŋ yo gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1427untitled.wav
88
+ 1240 1997.0 2022-01-30 L'anne sepasse j'ai parle jɛ́ gbi̋ŋ̋ zírȉ ŋ́ wélé pè ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1997untitled.wav C:/Users/Admin/Desktop/Ngen/Audio+previous_data/Audio_Ngen_2019_2020_2021/2022/30012022/220130-033.wav S jɛ gbiŋ ziri ŋ wele pe jɛ gbiŋ ziri ŋ wele pe jɛ gbiŋ ziri ŋ wele pe gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1997untitled.wav
89
+ 664 1201.0 2020-01-04 3 fɛ̏ ȁkɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1201untitled.wav S fɛ akɔ fɛ akɔ fɛ akɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1201untitled.wav
90
+ 579 1107.0 2020-01-03 mussulman làpúlúsílé ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1107untitled.wav S lapulusile lapulusile lapulusile gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1107untitled.wav
91
+ 722 1279.0 2020-01-04 100 gȁlȁ hàdò ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1279untitled.wav S gala hado gala hado gala hado gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1279untitled.wav
92
+ 1022 1735.0 2020-01-08 Il vait é lő ma̋ pɔ̀ɔ́ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1735untitled.wav S e lo ma pɔɔ e lo ma pɔɔ e lo ma pɔɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1735untitled.wav
93
+ 1148 1882.0 2020-10-10 Nous sechons bȅ wɛ̋ lɛ̏ píà là ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1882_10012020.wav S be wɛ lɛ pia la be wɛ lɛ pia la be wɛ lɛ pia la gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1882_10012020.wav
94
+ 493 1011.0 2020-01-02 Tu ne ì bá yɔ́ mḭ̏ wɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/1011_02012020.wav S i ba yɔ mi wɔ i ba yɔ mi wɔ i ba yɔ mi wɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/1011_02012020.wav
95
+ 1004 1705.0 2020-01-08 Vous ne tuiriez pas Ká bá dó lɛ̏ zɛ̀ ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1705untitled.wav S Ka ba do lɛ zɛ ɔ Ka ba do lɛ zɛ ɔ Ka ba do lɛ zɛ ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1705untitled.wav
96
+ 168 357.0 2019-12-28 probleme we̋le̋ ngn/clips/Audio_Ngen_2019_2020_2021/28122019/357untitled.wav S wele wele wele gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/357untitled.wav
97
+ 820 1465.0 2020-01-06 2 Marmite dìŋ yòrò pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1465untitled.wav S diŋ yoro palaŋ diŋ yoro palaŋ diŋ yoro palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1465untitled.wav
98
+ 614 1142.0 2020-01-03 T'envoye A nu na azoŋ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1142untitled.wav S A nu na azoŋ A nu na azoŋ A nu na azoŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1142untitled.wav
99
+ 1095 1828.0 2020-10-10 Tu Ye butɛlilɛ pala ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1828_10012020.wav S Ye butɛlilɛ pala Ye butɛlilɛ pala Ye butɛlilɛ pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1828_10012020.wav
100
+ 622 1150.0 2020-01-03 Nous envoyons Ba nu na azoŋ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1150untitled.wav S Ba nu na azoŋ Ba nu na azoŋ Ba nu na azoŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1150untitled.wav
101
+ 968 1653.0 2020-01-08 Je ne tue Má dő zɛ̀ ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1653untitled.wav S Ma do zɛ ɔ Ma do zɛ ɔ Ma do zɛ ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1653untitled.wav
102
+ 938 1621.0 2020-01-08 J'ai ŋ̋ nù pálá ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1621untitled.wav S ŋ nu pala ŋ nu pala ŋ nu pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1621untitled.wav
103
+ 453 971.0 2020-01-02 Ils n'ont pas Wá yí ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/971_02012020.wav S Wa yi ɔ Wa yi ɔ Wa yi ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/971_02012020.wav
104
+ 939 1622.0 2020-01-08 Tu as ya̋ nù pálá ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1622untitled.wav S ya nu pala ya nu pala ya nu pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1622untitled.wav
105
+ 297 736.0 2019-12-31 1 gbɔ̏n dó ngn/clips/Audio_Ngen_2019_2020_2021/30122019/736untitled.wav S gbɔn do gbɔn do gbɔn do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/736untitled.wav
106
+ 1053 1779.0 2020-01-09 lait (du moutons etc) nɔ́nɔ́ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1779untitled.wav S nɔnɔ nɔnɔ nɔnɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1779untitled.wav
107
+ 107 204.0 2019-12-27 7 poitrine gɔ̋ sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/27122019/204untitled.wav S gɔ sienu gɔ sienu gɔ sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/27122019/204untitled.wav
108
+ 705 1248.0 2020-01-04 10 bú ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1248untitled.wav S bu bu bu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1248untitled.wav
109
+ 482 1000.0 2020-01-02 Il va è lőma̋ yɔ̏ mḭ́ gbɛ̏ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/1000_02012020.wav S e loma yɔ mi gbɛ e loma yɔ mi gbɛ e loma yɔ mi gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/1000_02012020.wav
110
+ 1051 1777.0 2020-01-09 2 pluie ɲɔ̏ pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1777untitled.wav S ɲɔ palaŋ ɲɔ palaŋ ɲɔ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1777untitled.wav
111
+ 915 1598.0 2020-01-08 Tu boivais Kɛ i yi mi ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1598untitled.wav S Kɛ i yi mi Kɛ i yi mi Kɛ i yi mi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1598untitled.wav
112
+ 382 849.0 2020-01-02 unir bɛŋɛ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/849_02012020.wav S bɛŋɛ bɛŋɛ bɛŋɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/849_02012020.wav
113
+ 573 1101.0 2020-01-03 diminuer lábúlɛ́ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1101untitled.wav S labulɛ labulɛ labulɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1101untitled.wav
114
+ 361 824.0 2020-01-02 panier go ngn/clips/Audio_Ngen_2019_2020_2021/31122019/824untitled.wav S go go go gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/824untitled.wav
115
+ 351 810.0 2019-12-31 fūfūū (léger) fófó ngn/clips/Audio_Ngen_2019_2020_2021/31122019/810untitled.wav S fofo fofo fofo gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/810untitled.wav
116
+ 1124 1858.0 2020-10-10 Tu as yà wé lé pè ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1858_10012020.wav S ya we le pe ya we le pe ya we le pe gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1858_10012020.wav
117
+ 365 828.0 2020-01-02 2 cochons gbőŋ̋ palaŋ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/828_02012020.wav S gboŋ palaŋ gboŋ palaŋ gboŋ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/828_02012020.wav
118
+ 953 1637.0 2020-01-08 Nous allons Bá nű mà pálá ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1637untitled.wav S Ba nu ma pala Ba nu ma pala Ba nu ma pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1637untitled.wav
119
+ 346 805.0 2019-12-31 2 formi magnan zúrúŋ́ páláŋ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/805untitled.wav S zuruŋ palaŋ zuruŋ palaŋ zuruŋ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/805untitled.wav
120
+ 111 214.0 2019-12-27 2 noix de kola gɔ́lí pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/27122019/214untitled.wav S gɔli palaŋ gɔli palaŋ gɔli palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/27122019/214untitled.wav
121
+ 691 1234.0 2020-01-04 8 dó sɔ́ŋ̀bȁ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1234untitled.wav S do sɔŋba do sɔŋba do sɔŋba gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1234untitled.wav
122
+ 289 714.0 2019-12-31 Nos bá dó sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/714untitled.wav S ba do sienu ba do sienu ba do sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/714untitled.wav
123
+ 882 1556.0 2020-01-07 Il ne É bá nűmà pálà ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1556untitled.wav S E ba numa pala E ba numa pala E ba numa pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1556untitled.wav
124
+ 578 1106.0 2020-01-03 ciel làpúlú ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1106untitled.wav S lapulu lapulu lapulu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1106untitled.wav
125
+ 175 367.0 2019-12-28 9 lions jàra̋ ti̋zì ngn/clips/Audio_Ngen_2019_2020_2021/28122019/367untitled.wav S jara tizi jara tizi jara tizi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/367untitled.wav
126
+ 240 655.0 2019-12-30 Votre ká yórȍ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/655untitled.wav S ka yoro ka yoro ka yoro gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/655untitled.wav
127
+ 828 1475.0 2020-01-06 Le termite est dó lɛ tɛ̰ɛ̰ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1475untitled.wav S do lɛ tɛɛ do lɛ tɛɛ do lɛ tɛɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1475untitled.wav
128
+ 934 1617.0 2020-01-08 Il ne Kɛ e ba yi la a ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1617untitled.wav S Kɛ e ba yi la a Kɛ e ba yi la a Kɛ e ba yi la a gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1617untitled.wav
129
+ 796 1438.0 2020-01-06 Poitrine dìŋ gɔ́ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1438untitled.wav S diŋ gɔ diŋ gɔ diŋ gɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1438untitled.wav
130
+ 1213 1966.0 2020-01-12 en bas trɔ́má ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1966untitled.wav S trɔma trɔma trɔma gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1966untitled.wav
131
+ 1101 1834.0 2020-10-10 La poule l'a piquer Mɛnɛnɛ a pa ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1834_10012020.wav S Mɛnɛnɛ a pa Mɛnɛnɛ a pa Mɛnɛnɛ a pa gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1834_10012020.wav
132
+ 1098 1831.0 2020-10-10 La poule les a piquer Mɛnɛnɛ a wo pa ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1831_10012020.wav S Mɛnɛnɛ a wo pa Mɛnɛnɛ a wo pa Mɛnɛnɛ a wo pa gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1831_10012020.wav
133
+ 670 1213.0 2020-01-04 5 gɔ̋ sɔ̋ŋ̋ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1213untitled.wav S gɔ sɔŋ gɔ sɔŋ gɔ sɔŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1213untitled.wav
134
+ 679 1222.0 2020-01-04 6 dő sɔ̋dù ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1222untitled.wav S do sɔdu do sɔdu do sɔdu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1222untitled.wav
135
+ 1211 1963.0 2020-01-12 igname trɛ́ ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1963untitled.wav S trɛ trɛ trɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1963untitled.wav
136
+ 271 690.0 2019-12-31 Nos ka̋ yő sienȕ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/690untitled.wav S ka yo sienu ka yo sienu ka yo sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/690untitled.wav
137
+ 1115 1849.0 2020-10-10 Tu as Ya ye sa ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1849_10012020.wav S Ya ye sa Ya ye sa Ya ye sa gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1849_10012020.wav
138
+ 1197 1949.0 2020-01-12 poivre séwé ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1949untitled.wav S sewe sewe sewe gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1949untitled.wav
139
+ 56 118.0 2019-12-26 Je vois 5 alcool ŋ̋ yó sɔ̋ŋ̋ yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/118.wav S ŋ yo sɔŋ ye ŋ yo sɔŋ ye ŋ yo sɔŋ ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/118.wav
140
+ 886 1561.0 2020-01-08 Je mé yí mì là ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1561untitled.wav S me yi mi la me yi mi la me yi mi la gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1561untitled.wav
141
+ 621 1149.0 2020-01-03 Il envoye A nu na azoŋ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1149untitled.wav S A nu na azoŋ A nu na azoŋ A nu na azoŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1149untitled.wav
142
+ 1300 2066.0 2020-01-14 Je vois une petite femme (о росте) ŋ dro(personne petit) liŋ siŋ do ye ngn/clips/Audio_Ngen_2019_2020_2021/14012020/2066untitled.wav S ŋ dro(personne petit) liŋ siŋ do ye ŋ dro(personne petit) liŋ siŋ do ye ŋ dro(personne petit) liŋ siŋ do ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/14012020/2066untitled.wav
143
+ 695 1238.0 2020-01-04 8 wɛ̀ sɔ̀ŋ̏bȁ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1238untitled.wav S wɛ sɔŋba wɛ sɔŋba wɛ sɔŋba gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1238untitled.wav
144
+ 1292 2058.0 2020-01-14 Je vois 5 gazelle ŋ́ bɛ́ŋ̏ sɔ̀ŋ̀ yè или ŋ̋ bɛ́ŋ̏ sɔ̀ŋ̀ yè ngn/clips/Audio_Ngen_2019_2020_2021/14012020/2058untitled.wav S ŋ bɛŋ sɔŋ ye или ŋ bɛŋ sɔŋ ye ŋ bɛŋ sɔŋ ye или ŋ bɛŋ sɔŋ ye ŋ bɛŋ sɔŋ ye или ŋ bɛŋ sɔŋ ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/14012020/2058untitled.wav
145
+ 590 1118.0 2020-01-03 offencer lɛ́wéwèlɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1118untitled.wav S lɛwewelɛ lɛwewelɛ lɛwewelɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1118untitled.wav
146
+ 874 1546.0 2020-01-07 Tu les chatoille Yè wő nɛ̀kéŋ́ lȁ ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1546untitled.wav S Ye wo nɛkeŋ la Ye wo nɛkeŋ la Ye wo nɛkeŋ la gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1546untitled.wav
147
+ 123 250.0 2019-12-27 7 miel gbɔ̰́ sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/27122019/250untitled.wav S gbɔ sienu gbɔ sienu gbɔ sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/27122019/250untitled.wav
148
+ 270 686.0 2019-12-31 Leurs wò gő pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/686untitled.wav S wo go palaŋ wo go palaŋ wo go palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/686untitled.wav
149
+ 309 754.0 2019-12-30 2 chanson lɛ̀lɛ̀ŋ pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/754untitled.wav S lɛlɛŋ palaŋ lɛlɛŋ palaŋ lɛlɛŋ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/754untitled.wav
150
+ 884 1558.0 2020-01-07 Vous ne Ká bá nűmà pálà ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1558untitled.wav S Ka ba numa pala Ka ba numa pala Ka ba numa pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1558untitled.wav
151
+ 1065 1795.0 2020-01-09 année chɛ̏ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1795untitled.wav S chɛ chɛ chɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1795untitled.wav
152
+ 591 1119.0 2020-01-03 s'etonner lɛ́wílɛ́ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1119untitled.wav S lɛwilɛ lɛwilɛ lɛwilɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1119untitled.wav
153
+ 427 945.0 2020-01-02 Nous allons Bà nűma̋ yȉ gbɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/945_02012020.wav S Ba numa yi gbɛ Ba numa yi gbɛ Ba numa yi gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/945_02012020.wav
154
+ 650 1179.0 2020-01-03 Marmite de femme Lìŋ̀ yòrȍ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1179untitled.wav S Liŋ yoro Liŋ yoro Liŋ yoro gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1179untitled.wav
155
+ 113 216.0 2019-12-27 9 noix de kola gɔ́lí ti̋zì ngn/clips/Audio_Ngen_2019_2020_2021/27122019/216untitled.wav S gɔli tizi gɔli tizi gɔli tizi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/27122019/216untitled.wav
156
+ 793 1435.0 2020-01-06 Alcool dà yȍ или dá yȍ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1435untitled.wav S da yo или da yo da yo или da yo da yo или da yo gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1435untitled.wav
157
+ 174 366.0 2019-12-28 9 os wɛ̋lɛ̋ ti̋zì ngn/clips/Audio_Ngen_2019_2020_2021/28122019/366untitled.wav S wɛlɛ tizi wɛlɛ tizi wɛlɛ tizi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/366untitled.wav
158
+ 430 948.0 2020-01-02 J'ai ŋ̋ yȉ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/948_02012020.wav S ŋ yi ŋ yi ŋ yi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/948_02012020.wav
159
+ 1112 1845.0 2020-10-10 Couvette est plein pɛ́ŋ́ nɛ́ palɛ ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1845_10012020.wav S pɛŋ nɛ palɛ pɛŋ nɛ palɛ pɛŋ nɛ palɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1845_10012020.wav
160
+ 1035 1756.0 2020-01-09 en haut núŋ̋ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1756untitled.wav S nuŋ nuŋ nuŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1756untitled.wav
161
+ 1280 2038.0 2020-01-13 mentir kíná-yɛ̀lɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/13012020/2038untitled.wav S kina-yɛlɛ kina-yɛlɛ kina-yɛlɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/13012020/2038untitled.wav
162
+ 31 75.0 2019-12-25 7 tortues zàpli̋ sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/25122019/029_25122019.wav S zapli sienu zapli sienu zapli sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/25122019/029_25122019.wav
163
+ 210 448.0 2019-12-28 Il y a 9 cobra dò ti̋zì be̋ nɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/28122019/448untitled.wav S do tizi be nɔ do tizi be nɔ do tizi be nɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/448untitled.wav
164
+ 715 1269.0 2020-01-04 20 gàlà bùá pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1269untitled.wav S gala bua palaŋ gala bua palaŋ gala bua palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1269untitled.wav
165
+ 354 814.0 2019-12-31 herbe pùpű ngn/clips/Audio_Ngen_2019_2020_2021/31122019/814untitled.wav S pupu pupu pupu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/814untitled.wav
166
+ 188 387.0 2020-01-02 Je vois 7 cadavre ŋ̋ gáé sìènȕ yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/387untitled.wav S ŋ gae sienu ye ŋ gae sienu ye ŋ gae sienu ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/387untitled.wav
167
+ 363 826.0 2020-01-02 1 route tɛ̋ dó ngn/clips/Audio_Ngen_2019_2020_2021/02012020/826_02012020.wav S tɛ do tɛ do tɛ do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/826_02012020.wav
168
+ 668 1211.0 2020-01-04 4 Реализация ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1211untitled.wav S Реализация Реализация Реализация gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1211untitled.wav
filtered_train_dataset.tsv ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e75143472ee7b2f1a28b53c6ea1f3ad2d4af3c3b2df4b8151cef597d92a94ed2
3
  size 3858906296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0357c29f6b59309bddd22b7893835b5af978ca5a7075a63281e81d95103265a7
3
  size 3858906296
runs/Dec10_17-36-35_instance-20241206-091824/events.out.tfevents.1733852230.instance-20241206-091824 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b463fbdda649426fb1469c61f2344ee54408247645ebe28b422094ce5a5fca
3
+ size 6473
runs/Dec10_17-40-16_instance-20241206-091824/events.out.tfevents.1733852448.instance-20241206-091824 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffd2e88eadaf842d353af6af860e62a0822fde31ed4366fb8bf02d4398d142c
3
+ size 6473
runs/Dec11_07-51-40_instance-20241206-091824/events.out.tfevents.1733903600.instance-20241206-091824 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0e760a2678c9e6bb159861cdc1b31b9e9cfe8aeb7c36735e2fb7e03dedc7bf5
3
+ size 8475
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b1c8d792f1585e4823062e524bee00880167bce12c3af21830f36ede8efa87f
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e58c540dcc8c04048209e1f06263dc4526ae53030cc4f3e0a38a278e3ed53001
3
  size 5368