Training in progress, step 84
Browse files- .ipynb_checkpoints/Untitled-checkpoint.ipynb +282 -0
- .ipynb_checkpoints/filtered_train_dataset-checkpoint.tsv +0 -0
- Untitled.ipynb +332 -0
- filtered_test_dataset.tsv +168 -0
- filtered_train_dataset.tsv +0 -0
- model.safetensors +1 -1
- runs/Dec10_17-36-35_instance-20241206-091824/events.out.tfevents.1733852230.instance-20241206-091824 +3 -0
- runs/Dec10_17-40-16_instance-20241206-091824/events.out.tfevents.1733852448.instance-20241206-091824 +3 -0
- runs/Dec11_07-51-40_instance-20241206-091824/events.out.tfevents.1733903600.instance-20241206-091824 +3 -0
- training_args.bin +1 -1
.ipynb_checkpoints/Untitled-checkpoint.ipynb
ADDED
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"id": "de57d37b-73e7-40aa-a922-7c4a9fbc8085",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stdout",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"Обработка тренировочного датасета...\n",
|
14 |
+
"Загружаем список файлов из ngn/train_big.tsv\n",
|
15 |
+
"Проверяем размеры 1120 файлов (ищем файлы < 1.0MB)...\n",
|
16 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/02012020/995_02012020.wav (2.06MB > 1.0MB)\n",
|
17 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/26122019/110.wav, размер: 0.79MB\n",
|
18 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/27122019/321untitled.wav, размер: 0.43MB\n",
|
19 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1448untitled.wav, размер: 0.55MB\n",
|
20 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1866_10012020.wav, размер: 0.22MB\n",
|
21 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1986untitled.wav, размер: 0.32MB\n",
|
22 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/30122019/658untitled.wav (1.26MB > 1.0MB)\n",
|
23 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1064untitled.wav (1.52MB > 1.0MB)\n",
|
24 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1098untitled.wav (1.09MB > 1.0MB)\n",
|
25 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/26122019/116.wav (1.17MB > 1.0MB)\n",
|
26 |
+
"Проверено 100/1120 файлов...\n",
|
27 |
+
"Проверено 200/1120 файлов...\n"
|
28 |
+
]
|
29 |
+
}
|
30 |
+
],
|
31 |
+
"source": [
|
32 |
+
"import os\n",
|
33 |
+
"from google.cloud import storage\n",
|
34 |
+
"import pandas as pd\n",
|
35 |
+
"from typing import Tuple, List, Dict\n",
|
36 |
+
"import io\n",
|
37 |
+
"\n",
|
38 |
+
"def check_blob_exists_and_size(bucket, possible_paths: List[str]) -> Tuple[bool, str, float]:\n",
|
39 |
+
" \"\"\"\n",
|
40 |
+
" Проверяет существование файла по всем возможным путям и его размер\n",
|
41 |
+
" Returns:\n",
|
42 |
+
" Tuple[bool, str, float]: (найден ли файл, путь к файлу, размер в MB)\n",
|
43 |
+
" \"\"\"\n",
|
44 |
+
" for path in possible_paths:\n",
|
45 |
+
" blob = bucket.blob(path)\n",
|
46 |
+
" try:\n",
|
47 |
+
" if blob.exists():\n",
|
48 |
+
" blob.reload() # Загружаем метаданные\n",
|
49 |
+
" size_mb = blob.size / (1024 * 1024)\n",
|
50 |
+
" return True, path, size_mb\n",
|
51 |
+
" except Exception as e:\n",
|
52 |
+
" print(f\"Ошибка при проверке файла {path}: {str(e)}\")\n",
|
53 |
+
" continue\n",
|
54 |
+
" return False, \"\", 0.0\n",
|
55 |
+
"\n",
|
56 |
+
"def generate_possible_paths(original_path: str, clips_prefix: str) -> List[str]:\n",
|
57 |
+
" \"\"\"\n",
|
58 |
+
" Генерирует все возможные варианты пути к файлу с учетом префикса\n",
|
59 |
+
" \"\"\"\n",
|
60 |
+
" parts = original_path.split('/')\n",
|
61 |
+
" if len(parts) < 2:\n",
|
62 |
+
" return []\n",
|
63 |
+
" current_folder = parts[-2] # получаем название текущей папки\n",
|
64 |
+
" filename = parts[-1] # получаем имя файла с расширением\n",
|
65 |
+
" base_dir = '/'.join(parts[:-1]) # получаем путь к директории\n",
|
66 |
+
"\n",
|
67 |
+
" # Разделяем имя файла на имя и расширение\n",
|
68 |
+
" name, extension = os.path.splitext(filename)\n",
|
69 |
+
" \n",
|
70 |
+
" # Разделяем имя по знаку подчеркивания и берем левую часть\n",
|
71 |
+
" base_name = name.split('_')[0]\n",
|
72 |
+
" \n",
|
73 |
+
" # Формируем все возможные варианты имен файлов\n",
|
74 |
+
" possible_names = [\n",
|
75 |
+
" f\"{base_name}{extension}\", # просто имя с расширением\n",
|
76 |
+
" f\"{base_name}_{current_folder}{extension}\", # имя + текущая папка\n",
|
77 |
+
" f\"{base_name}untitled{extension}\" # имя + untitled\n",
|
78 |
+
" ]\n",
|
79 |
+
" \n",
|
80 |
+
" # Формируем полные пути с префиксом\n",
|
81 |
+
" possible_paths = [f\"{clips_prefix}/{base_dir}/{name}\" for name in possible_names]\n",
|
82 |
+
" \n",
|
83 |
+
" return list(set(possible_paths)) # удаляем возможные дубликаты\n",
|
84 |
+
"\n",
|
85 |
+
"def check_all_files_sizes(bucket_name: str, tsv_path: str, clips_prefix: str, max_size_mb: float = 1.0) -> Dict[str, Tuple[str, float]]:\n",
|
86 |
+
" \"\"\"\n",
|
87 |
+
" Про��еряет размеры всех файлов и возвращает словарь с информацией о файлах меньше max_size_mb\n",
|
88 |
+
" \"\"\"\n",
|
89 |
+
" storage_client = storage.Client()\n",
|
90 |
+
" bucket = storage_client.bucket(bucket_name)\n",
|
91 |
+
" \n",
|
92 |
+
" print(f\"Загружаем список файлов из {tsv_path}\")\n",
|
93 |
+
" blob = bucket.blob(tsv_path)\n",
|
94 |
+
" content = blob.download_as_string()\n",
|
95 |
+
" df = pd.read_csv(io.BytesIO(content), sep='\\t')\n",
|
96 |
+
" \n",
|
97 |
+
" valid_files = {}\n",
|
98 |
+
" missing_files = []\n",
|
99 |
+
" oversized_files = []\n",
|
100 |
+
" \n",
|
101 |
+
" total_files = len(df)\n",
|
102 |
+
" print(f\"Проверяем размеры {total_files} файлов (ищем файлы < {max_size_mb}MB)...\")\n",
|
103 |
+
" \n",
|
104 |
+
" # Статистика по размерам\n",
|
105 |
+
" size_stats = {\n",
|
106 |
+
" '0-0.5MB': 0,\n",
|
107 |
+
" '0.5-1MB': 0,\n",
|
108 |
+
" '1-1.5MB': 0,\n",
|
109 |
+
" '1.5-2MB': 0,\n",
|
110 |
+
" '>2MB': 0\n",
|
111 |
+
" }\n",
|
112 |
+
" \n",
|
113 |
+
" # Проверяем каждый файл\n",
|
114 |
+
" for idx, row in df.iterrows():\n",
|
115 |
+
" if 'path' not in row or pd.isna(row['path']):\n",
|
116 |
+
" continue\n",
|
117 |
+
" \n",
|
118 |
+
" original_path = row['path'].strip()\n",
|
119 |
+
" possible_paths = generate_possible_paths(original_path, clips_prefix)\n",
|
120 |
+
" exists, real_path, size_mb = check_blob_exists_and_size(bucket, possible_paths)\n",
|
121 |
+
" \n",
|
122 |
+
" if exists:\n",
|
123 |
+
" # Обновляем статистику по размерам\n",
|
124 |
+
" if size_mb > 2:\n",
|
125 |
+
" size_stats['>2MB'] += 1\n",
|
126 |
+
" elif size_mb > 1.5:\n",
|
127 |
+
" size_stats['1.5-2MB'] += 1\n",
|
128 |
+
" elif size_mb > 1:\n",
|
129 |
+
" size_stats['1-1.5MB'] += 1\n",
|
130 |
+
" elif size_mb > 0.5:\n",
|
131 |
+
" size_stats['0.5-1MB'] += 1\n",
|
132 |
+
" else:\n",
|
133 |
+
" size_stats['0-0.5MB'] += 1\n",
|
134 |
+
" \n",
|
135 |
+
" if size_mb < max_size_mb:\n",
|
136 |
+
" valid_files[original_path] = (real_path, size_mb)\n",
|
137 |
+
" if len(valid_files) <= 5:\n",
|
138 |
+
" print(f\"Подходящий файл найден: {real_path}, размер: {size_mb:.2f}MB\")\n",
|
139 |
+
" else:\n",
|
140 |
+
" oversized_files.append((original_path, size_mb))\n",
|
141 |
+
" if len(oversized_files) <= 5:\n",
|
142 |
+
" print(f\"Файл слишком большой: {real_path} ({size_mb:.2f}MB > {max_size_mb}MB)\")\n",
|
143 |
+
" else:\n",
|
144 |
+
" missing_files.append(original_path)\n",
|
145 |
+
" \n",
|
146 |
+
" if (idx + 1) % 100 == 0:\n",
|
147 |
+
" print(f\"Проверено {idx + 1}/{total_files} файлов...\")\n",
|
148 |
+
" \n",
|
149 |
+
" # Выводим статистику\n",
|
150 |
+
" print(\"\\nРезультаты проверки размеров:\")\n",
|
151 |
+
" print(f\"Всего файлов: {total_files}\")\n",
|
152 |
+
" print(f\"Файлов не найдено: {len(missing_files)}\")\n",
|
153 |
+
" print(f\"\\nРаспределение по размерам:\")\n",
|
154 |
+
" for size_range, count in size_stats.items():\n",
|
155 |
+
" print(f\"{size_range}: {count} файлов\")\n",
|
156 |
+
" \n",
|
157 |
+
" return valid_files, missing_files, oversized_files\n",
|
158 |
+
"\n",
|
159 |
+
"def create_filtered_dataset(bucket_name: str, tsv_path: str, clips_prefix: str, max_size_mb: float = 1.0) -> pd.DataFrame:\n",
|
160 |
+
" \"\"\"\n",
|
161 |
+
" Создает датафрейм только с файлами подходящего размера\n",
|
162 |
+
" \"\"\"\n",
|
163 |
+
" # Проверяем размеры всех файлов\n",
|
164 |
+
" valid_files, missing_files, oversized_files = check_all_files_sizes(\n",
|
165 |
+
" bucket_name, tsv_path, clips_prefix, max_size_mb\n",
|
166 |
+
" )\n",
|
167 |
+
" \n",
|
168 |
+
" # Загружаем исходный датафрейм\n",
|
169 |
+
" storage_client = storage.Client()\n",
|
170 |
+
" bucket = storage_client.bucket(bucket_name)\n",
|
171 |
+
" blob = bucket.blob(tsv_path)\n",
|
172 |
+
" content = blob.download_as_string()\n",
|
173 |
+
" df = pd.read_csv(io.BytesIO(content), sep='\\t')\n",
|
174 |
+
" \n",
|
175 |
+
" # Очищаем датафрейм\n",
|
176 |
+
" df = df.dropna(subset=['path', 'sentence_normalized'])\n",
|
177 |
+
" df = df[\n",
|
178 |
+
" (df['path'].str.strip() != '') & \n",
|
179 |
+
" (df['sentence_normalized'].str.strip() != '')\n",
|
180 |
+
" ]\n",
|
181 |
+
" \n",
|
182 |
+
" # Фильтруем датафрейм\n",
|
183 |
+
" filtered_df = df[df['path'].isin(valid_files.keys())].copy()\n",
|
184 |
+
" \n",
|
185 |
+
" # Обновляем пути и создаем аудио колонку\n",
|
186 |
+
" filtered_df['path'] = filtered_df['path'].apply(lambda x: valid_files[x][0])\n",
|
187 |
+
" filtered_df['audio'] = filtered_df['path'].apply(lambda x: f\"gs://{bucket_name}/{x}\")\n",
|
188 |
+
" \n",
|
189 |
+
" # Создаем тройную транскрипцию\n",
|
190 |
+
" filtered_df['sentence_normalized'] = filtered_df['sentence_normalized'].apply(\n",
|
191 |
+
" lambda x: ' '.join([x.strip()] * 3)\n",
|
192 |
+
" )\n",
|
193 |
+
" \n",
|
194 |
+
" print(f\"\\nСоздан отфильтрованный датасет с {len(filtered_df)} записями\")\n",
|
195 |
+
" return filtered_df\n",
|
196 |
+
"\n",
|
197 |
+
"def process_datasets(bucket_name: str, train_tsv: str, test_tsv: str, clips_prefix: str, max_size_mb: float = 1.0):\n",
|
198 |
+
" \"\"\"\n",
|
199 |
+
" Обрабатывает тренировочный и тестовый датасеты\n",
|
200 |
+
" \"\"\"\n",
|
201 |
+
" print(\"Обработка тренировочного датасета...\")\n",
|
202 |
+
" filtered_train_df = create_filtered_dataset(\n",
|
203 |
+
" bucket_name=bucket_name,\n",
|
204 |
+
" tsv_path=train_tsv,\n",
|
205 |
+
" clips_prefix=clips_prefix,\n",
|
206 |
+
" max_size_mb=max_size_mb\n",
|
207 |
+
" )\n",
|
208 |
+
" \n",
|
209 |
+
" print(\"\\nОбработка тестового датасета...\")\n",
|
210 |
+
" filtered_test_df = create_filtered_dataset(\n",
|
211 |
+
" bucket_name=bucket_name,\n",
|
212 |
+
" tsv_path=test_tsv,\n",
|
213 |
+
" clips_prefix=clips_prefix,\n",
|
214 |
+
" max_size_mb=max_size_mb\n",
|
215 |
+
" )\n",
|
216 |
+
" \n",
|
217 |
+
" # Сохраняем результаты\n",
|
218 |
+
" filtered_train_df.to_csv('1filtered_train_dataset.tsv', sep='\\t', index=False)\n",
|
219 |
+
" filtered_test_df.to_csv('filtered_test_dataset.tsv', sep='\\t', index=False)\n",
|
220 |
+
" \n",
|
221 |
+
" return filtered_train_df, filtered_test_df\n",
|
222 |
+
"\n",
|
223 |
+
"if __name__ == \"__main__\":\n",
|
224 |
+
" # Обрабатываем оба датасета\n",
|
225 |
+
" filtered_train_df, filtered_test_df = process_datasets(\n",
|
226 |
+
" bucket_name='ngen_model_fine_tuned',\n",
|
227 |
+
" train_tsv='ngn/train_big.tsv',\n",
|
228 |
+
" test_tsv='ngn/test_big.tsv',\n",
|
229 |
+
" clips_prefix='ngn/clips',\n",
|
230 |
+
" max_size_mb=1.0 # ограничение в 1MB\n",
|
231 |
+
" )\n",
|
232 |
+
" \n",
|
233 |
+
"# # Создаем датасеты для huggingface\n",
|
234 |
+
"# from datasets import Dataset\n",
|
235 |
+
"# from datasets.features import Audio\n",
|
236 |
+
" \n",
|
237 |
+
"# train_dataset = Dataset.from_pandas(filtered_train_df)\n",
|
238 |
+
"# test_dataset = Dataset.from_pandas(filtered_test_df)\n",
|
239 |
+
"# print(\"Созданы датасеты\")\n",
|
240 |
+
" \n",
|
241 |
+
"# train_dataset = train_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
|
242 |
+
"# test_dataset = test_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
|
243 |
+
"# print(\"Колонки аудио преобразованы\")"
|
244 |
+
]
|
245 |
+
},
|
246 |
+
{
|
247 |
+
"cell_type": "code",
|
248 |
+
"execution_count": null,
|
249 |
+
"id": "89853d1c-a7db-498b-bd45-bc6e72b86b4e",
|
250 |
+
"metadata": {},
|
251 |
+
"outputs": [],
|
252 |
+
"source": []
|
253 |
+
}
|
254 |
+
],
|
255 |
+
"metadata": {
|
256 |
+
"environment": {
|
257 |
+
"kernel": "conda-base-py",
|
258 |
+
"name": "workbench-notebooks.m126",
|
259 |
+
"type": "gcloud",
|
260 |
+
"uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m126"
|
261 |
+
},
|
262 |
+
"kernelspec": {
|
263 |
+
"display_name": "Python 3 (ipykernel) (Local)",
|
264 |
+
"language": "python",
|
265 |
+
"name": "conda-base-py"
|
266 |
+
},
|
267 |
+
"language_info": {
|
268 |
+
"codemirror_mode": {
|
269 |
+
"name": "ipython",
|
270 |
+
"version": 3
|
271 |
+
},
|
272 |
+
"file_extension": ".py",
|
273 |
+
"mimetype": "text/x-python",
|
274 |
+
"name": "python",
|
275 |
+
"nbconvert_exporter": "python",
|
276 |
+
"pygments_lexer": "ipython3",
|
277 |
+
"version": "3.10.15"
|
278 |
+
}
|
279 |
+
},
|
280 |
+
"nbformat": 4,
|
281 |
+
"nbformat_minor": 5
|
282 |
+
}
|
.ipynb_checkpoints/filtered_train_dataset-checkpoint.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Untitled.ipynb
ADDED
@@ -0,0 +1,332 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "de57d37b-73e7-40aa-a922-7c4a9fbc8085",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stdout",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"Обработка тренировочного датасета...\n",
|
14 |
+
"Загружаем список файлов из ngn/train_big.tsv\n",
|
15 |
+
"Проверяем размеры 1120 файлов (ищем файлы < 1.0MB)...\n",
|
16 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/02012020/995_02012020.wav (2.06MB > 1.0MB)\n",
|
17 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/26122019/110.wav, размер: 0.79MB\n",
|
18 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/27122019/321untitled.wav, размер: 0.43MB\n",
|
19 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1448untitled.wav, размер: 0.55MB\n",
|
20 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1866_10012020.wav, размер: 0.22MB\n",
|
21 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1986untitled.wav, размер: 0.32MB\n",
|
22 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/30122019/658untitled.wav (1.26MB > 1.0MB)\n",
|
23 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1064untitled.wav (1.52MB > 1.0MB)\n",
|
24 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1098untitled.wav (1.09MB > 1.0MB)\n",
|
25 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/26122019/116.wav (1.17MB > 1.0MB)\n",
|
26 |
+
"Проверено 100/1120 файлов...\n",
|
27 |
+
"Проверено 200/1120 файлов...\n",
|
28 |
+
"Проверено 400/1120 файлов...\n",
|
29 |
+
"Проверено 500/1120 файлов...\n",
|
30 |
+
"Проверено 600/1120 файлов...\n",
|
31 |
+
"Проверено 700/1120 файлов...\n",
|
32 |
+
"Проверено 800/1120 файлов...\n",
|
33 |
+
"Проверено 900/1120 файлов...\n",
|
34 |
+
"Проверено 1000/1120 файлов...\n",
|
35 |
+
"Проверено 1100/1120 файлов...\n",
|
36 |
+
"\n",
|
37 |
+
"Результаты проверки размеров:\n",
|
38 |
+
"Всего файлов: 1120\n",
|
39 |
+
"Файлов не найдено: 21\n",
|
40 |
+
"\n",
|
41 |
+
"Распределение по размерам:\n",
|
42 |
+
"0-0.5MB: 297 файлов\n",
|
43 |
+
"0.5-1MB: 364 файлов\n",
|
44 |
+
"1-1.5MB: 218 файлов\n",
|
45 |
+
"1.5-2MB: 65 файлов\n",
|
46 |
+
">2MB: 63 файлов\n",
|
47 |
+
"\n",
|
48 |
+
"Создан отфильтрованный датасет с 660 записями\n",
|
49 |
+
"\n",
|
50 |
+
"Обработка тестового датасета...\n",
|
51 |
+
"Загружаем список файлов из ngn/test_big.tsv\n",
|
52 |
+
"Проверяем размеры 281 файлов (ищем файлы < 1.0MB)...\n",
|
53 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1832_10012020.wav, размер: 0.23MB\n",
|
54 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1632untitled.wav, размер: 0.68MB\n",
|
55 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/26122019/111.wav, размер: 0.79MB\n",
|
56 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/13012020/2021untitled.wav (1.07MB > 1.0MB)\n",
|
57 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1249untitled.wav, размер: 0.53MB\n",
|
58 |
+
"Подходящий файл найден: ngn/clips/Audio_Ngen_2019_2020_2021/30122019/800untitled.wav, размер: 0.49MB\n",
|
59 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/31122019/803untitled.wav (1.39MB > 1.0MB)\n",
|
60 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/25122019/033_25122019.wav (2.82MB > 1.0MB)\n",
|
61 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/02012020/997_02012020.wav (1.15MB > 1.0MB)\n",
|
62 |
+
"Файл слишком большой: ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1455untitled.wav (1.40MB > 1.0MB)\n",
|
63 |
+
"Проверено 100/281 файлов...\n",
|
64 |
+
"Проверено 200/281 файлов...\n",
|
65 |
+
"\n",
|
66 |
+
"Результаты проверки размеров:\n",
|
67 |
+
"Всего файлов: 281\n",
|
68 |
+
"Файлов не найдено: 6\n",
|
69 |
+
"\n",
|
70 |
+
"Распределение по размерам:\n",
|
71 |
+
"0-0.5MB: 65 файлов\n",
|
72 |
+
"0.5-1MB: 102 файлов\n",
|
73 |
+
"1-1.5MB: 57 файлов\n",
|
74 |
+
"1.5-2MB: 12 файлов\n",
|
75 |
+
">2MB: 13 файлов\n",
|
76 |
+
"\n",
|
77 |
+
"Создан отфильтрованный датасет с 167 записями\n"
|
78 |
+
]
|
79 |
+
}
|
80 |
+
],
|
81 |
+
"source": [
|
82 |
+
"import os\n",
|
83 |
+
"from google.cloud import storage\n",
|
84 |
+
"import pandas as pd\n",
|
85 |
+
"from typing import Tuple, List, Dict\n",
|
86 |
+
"import io\n",
|
87 |
+
"\n",
|
88 |
+
"def check_blob_exists_and_size(bucket, possible_paths: List[str]) -> Tuple[bool, str, float]:\n",
|
89 |
+
" \"\"\"\n",
|
90 |
+
" Проверяет существование файла по всем возможным путям и его размер\n",
|
91 |
+
" Returns:\n",
|
92 |
+
" Tuple[bool, str, float]: (найден ли файл, путь к файлу, размер в MB)\n",
|
93 |
+
" \"\"\"\n",
|
94 |
+
" for path in possible_paths:\n",
|
95 |
+
" blob = bucket.blob(path)\n",
|
96 |
+
" try:\n",
|
97 |
+
" if blob.exists():\n",
|
98 |
+
" blob.reload() # Загружаем метаданные\n",
|
99 |
+
" size_mb = blob.size / (1024 * 1024)\n",
|
100 |
+
" return True, path, size_mb\n",
|
101 |
+
" except Exception as e:\n",
|
102 |
+
" print(f\"Ошибка при проверке файла {path}: {str(e)}\")\n",
|
103 |
+
" continue\n",
|
104 |
+
" return False, \"\", 0.0\n",
|
105 |
+
"\n",
|
106 |
+
"def generate_possible_paths(original_path: str, clips_prefix: str) -> List[str]:\n",
|
107 |
+
" \"\"\"\n",
|
108 |
+
" Генерирует все возможные варианты пути к файлу с учетом префикса\n",
|
109 |
+
" \"\"\"\n",
|
110 |
+
" parts = original_path.split('/')\n",
|
111 |
+
" if len(parts) < 2:\n",
|
112 |
+
" return []\n",
|
113 |
+
" current_folder = parts[-2] # получаем название текущей папки\n",
|
114 |
+
" filename = parts[-1] # получаем имя файла с расширением\n",
|
115 |
+
" base_dir = '/'.join(parts[:-1]) # получаем путь к директории\n",
|
116 |
+
"\n",
|
117 |
+
" # Разделяем имя файла на имя и расширение\n",
|
118 |
+
" name, extension = os.path.splitext(filename)\n",
|
119 |
+
" \n",
|
120 |
+
" # Разделяем имя по знаку подчеркивания и берем левую часть\n",
|
121 |
+
" base_name = name.split('_')[0]\n",
|
122 |
+
" \n",
|
123 |
+
" # Формируем все возможные варианты имен файлов\n",
|
124 |
+
" possible_names = [\n",
|
125 |
+
" f\"{base_name}{extension}\", # просто имя с расширением\n",
|
126 |
+
" f\"{base_name}_{current_folder}{extension}\", # имя + текущая папка\n",
|
127 |
+
" f\"{base_name}untitled{extension}\" # имя + untitled\n",
|
128 |
+
" ]\n",
|
129 |
+
" \n",
|
130 |
+
" # Формируем полные пути с префиксом\n",
|
131 |
+
" possible_paths = [f\"{clips_prefix}/{base_dir}/{name}\" for name in possible_names]\n",
|
132 |
+
" \n",
|
133 |
+
" return list(set(possible_paths)) # удаляем возможные дубликаты\n",
|
134 |
+
"\n",
|
135 |
+
"def check_all_files_sizes(bucket_name: str, tsv_path: str, clips_prefix: str, max_size_mb: float = 1.0) -> Dict[str, Tuple[str, float]]:\n",
|
136 |
+
" \"\"\"\n",
|
137 |
+
" Проверяет размеры всех файлов и возвращает словарь с информацией о файлах меньше max_size_mb\n",
|
138 |
+
" \"\"\"\n",
|
139 |
+
" storage_client = storage.Client()\n",
|
140 |
+
" bucket = storage_client.bucket(bucket_name)\n",
|
141 |
+
" \n",
|
142 |
+
" print(f\"Загружаем список файлов из {tsv_path}\")\n",
|
143 |
+
" blob = bucket.blob(tsv_path)\n",
|
144 |
+
" content = blob.download_as_string()\n",
|
145 |
+
" df = pd.read_csv(io.BytesIO(content), sep='\\t')\n",
|
146 |
+
" \n",
|
147 |
+
" valid_files = {}\n",
|
148 |
+
" missing_files = []\n",
|
149 |
+
" oversized_files = []\n",
|
150 |
+
" \n",
|
151 |
+
" total_files = len(df)\n",
|
152 |
+
" print(f\"Проверяем размеры {total_files} файлов (ищем файлы < {max_size_mb}MB)...\")\n",
|
153 |
+
" \n",
|
154 |
+
" # Статистика по размерам\n",
|
155 |
+
" size_stats = {\n",
|
156 |
+
" '0-0.5MB': 0,\n",
|
157 |
+
" '0.5-1MB': 0,\n",
|
158 |
+
" '1-1.5MB': 0,\n",
|
159 |
+
" '1.5-2MB': 0,\n",
|
160 |
+
" '>2MB': 0\n",
|
161 |
+
" }\n",
|
162 |
+
" \n",
|
163 |
+
" # Проверяем каждый файл\n",
|
164 |
+
" for idx, row in df.iterrows():\n",
|
165 |
+
" if 'path' not in row or pd.isna(row['path']):\n",
|
166 |
+
" continue\n",
|
167 |
+
" \n",
|
168 |
+
" original_path = row['path'].strip()\n",
|
169 |
+
" possible_paths = generate_possible_paths(original_path, clips_prefix)\n",
|
170 |
+
" exists, real_path, size_mb = check_blob_exists_and_size(bucket, possible_paths)\n",
|
171 |
+
" \n",
|
172 |
+
" if exists:\n",
|
173 |
+
" # Обновляем статистику по размерам\n",
|
174 |
+
" if size_mb > 2:\n",
|
175 |
+
" size_stats['>2MB'] += 1\n",
|
176 |
+
" elif size_mb > 1.5:\n",
|
177 |
+
" size_stats['1.5-2MB'] += 1\n",
|
178 |
+
" elif size_mb > 1:\n",
|
179 |
+
" size_stats['1-1.5MB'] += 1\n",
|
180 |
+
" elif size_mb > 0.5:\n",
|
181 |
+
" size_stats['0.5-1MB'] += 1\n",
|
182 |
+
" else:\n",
|
183 |
+
" size_stats['0-0.5MB'] += 1\n",
|
184 |
+
" \n",
|
185 |
+
" if size_mb < max_size_mb:\n",
|
186 |
+
" valid_files[original_path] = (real_path, size_mb)\n",
|
187 |
+
" if len(valid_files) <= 5:\n",
|
188 |
+
" print(f\"Подходящий файл найден: {real_path}, размер: {size_mb:.2f}MB\")\n",
|
189 |
+
" else:\n",
|
190 |
+
" oversized_files.append((original_path, size_mb))\n",
|
191 |
+
" if len(oversized_files) <= 5:\n",
|
192 |
+
" print(f\"Файл слишком большой: {real_path} ({size_mb:.2f}MB > {max_size_mb}MB)\")\n",
|
193 |
+
" else:\n",
|
194 |
+
" missing_files.append(original_path)\n",
|
195 |
+
" \n",
|
196 |
+
" if (idx + 1) % 100 == 0:\n",
|
197 |
+
" print(f\"Проверено {idx + 1}/{total_files} файлов...\")\n",
|
198 |
+
" \n",
|
199 |
+
" # Выводим статистику\n",
|
200 |
+
" print(\"\\nРезультаты проверки размеров:\")\n",
|
201 |
+
" print(f\"Всего файлов: {total_files}\")\n",
|
202 |
+
" print(f\"Файлов не найдено: {len(missing_files)}\")\n",
|
203 |
+
" print(f\"\\nРаспределение по размерам:\")\n",
|
204 |
+
" for size_range, count in size_stats.items():\n",
|
205 |
+
" print(f\"{size_range}: {count} файлов\")\n",
|
206 |
+
" \n",
|
207 |
+
" return valid_files, missing_files, oversized_files\n",
|
208 |
+
"\n",
|
209 |
+
"def create_filtered_dataset(bucket_name: str, tsv_path: str, clips_prefix: str, max_size_mb: float = 1.0) -> pd.DataFrame:\n",
|
210 |
+
" \"\"\"\n",
|
211 |
+
" Создает датафрейм только с файлами подходящего размера\n",
|
212 |
+
" \"\"\"\n",
|
213 |
+
" # Проверяем размеры всех файлов\n",
|
214 |
+
" valid_files, missing_files, oversized_files = check_all_files_sizes(\n",
|
215 |
+
" bucket_name, tsv_path, clips_prefix, max_size_mb\n",
|
216 |
+
" )\n",
|
217 |
+
" \n",
|
218 |
+
" # Загружаем исходный датафрейм\n",
|
219 |
+
" storage_client = storage.Client()\n",
|
220 |
+
" bucket = storage_client.bucket(bucket_name)\n",
|
221 |
+
" blob = bucket.blob(tsv_path)\n",
|
222 |
+
" content = blob.download_as_string()\n",
|
223 |
+
" df = pd.read_csv(io.BytesIO(content), sep='\\t')\n",
|
224 |
+
" \n",
|
225 |
+
" # Очищаем датафрейм\n",
|
226 |
+
" df = df.dropna(subset=['path', 'sentence_normalized'])\n",
|
227 |
+
" df = df[\n",
|
228 |
+
" (df['path'].str.strip() != '') & \n",
|
229 |
+
" (df['sentence_normalized'].str.strip() != '')\n",
|
230 |
+
" ]\n",
|
231 |
+
" \n",
|
232 |
+
" # Фильтруем датафрейм\n",
|
233 |
+
" filtered_df = df[df['path'].isin(valid_files.keys())].copy()\n",
|
234 |
+
" \n",
|
235 |
+
" # Обновляем пути и создаем аудио колонку\n",
|
236 |
+
" filtered_df['path'] = filtered_df['path'].apply(lambda x: valid_files[x][0])\n",
|
237 |
+
" filtered_df['audio'] = filtered_df['path'].apply(lambda x: f\"gs://{bucket_name}/{x}\")\n",
|
238 |
+
" \n",
|
239 |
+
" # Создаем тройную транскрипцию\n",
|
240 |
+
" filtered_df['sentence_normalized'] = filtered_df['sentence_normalized'].apply(\n",
|
241 |
+
" lambda x: ' '.join([x.strip()] * 3)\n",
|
242 |
+
" )\n",
|
243 |
+
" \n",
|
244 |
+
" print(f\"\\nСоздан отфильтрованный датасет с {len(filtered_df)} записями\")\n",
|
245 |
+
" return filtered_df\n",
|
246 |
+
"\n",
|
247 |
+
"def process_datasets(bucket_name: str, train_tsv: str, test_tsv: str, clips_prefix: str, max_size_mb: float = 1.0):\n",
|
248 |
+
" \"\"\"\n",
|
249 |
+
" Обрабатывает тренировочный и тестовый датасеты\n",
|
250 |
+
" \"\"\"\n",
|
251 |
+
" print(\"Обработка тренировочного датасета...\")\n",
|
252 |
+
" filtered_train_df = create_filtered_dataset(\n",
|
253 |
+
" bucket_name=bucket_name,\n",
|
254 |
+
" tsv_path=train_tsv,\n",
|
255 |
+
" clips_prefix=clips_prefix,\n",
|
256 |
+
" max_size_mb=max_size_mb\n",
|
257 |
+
" )\n",
|
258 |
+
" \n",
|
259 |
+
" print(\"\\nОбработка тестового датасета...\")\n",
|
260 |
+
" filtered_test_df = create_filtered_dataset(\n",
|
261 |
+
" bucket_name=bucket_name,\n",
|
262 |
+
" tsv_path=test_tsv,\n",
|
263 |
+
" clips_prefix=clips_prefix,\n",
|
264 |
+
" max_size_mb=max_size_mb\n",
|
265 |
+
" )\n",
|
266 |
+
" \n",
|
267 |
+
" # Сохраняем результаты\n",
|
268 |
+
" filtered_train_df.to_csv('1filtered_train_dataset.tsv', sep='\\t', index=False)\n",
|
269 |
+
" filtered_test_df.to_csv('filtered_test_dataset.tsv', sep='\\t', index=False)\n",
|
270 |
+
" \n",
|
271 |
+
" return filtered_train_df, filtered_test_df\n",
|
272 |
+
"\n",
|
273 |
+
"if __name__ == \"__main__\":\n",
|
274 |
+
" # Обрабатываем оба датасета\n",
|
275 |
+
" filtered_train_df, filtered_test_df = process_datasets(\n",
|
276 |
+
" bucket_name='ngen_model_fine_tuned',\n",
|
277 |
+
" train_tsv='ngn/train_big.tsv',\n",
|
278 |
+
" test_tsv='ngn/test_big.tsv',\n",
|
279 |
+
" clips_prefix='ngn/clips',\n",
|
280 |
+
" max_size_mb=1.0 # ограничение в 1MB\n",
|
281 |
+
" )\n",
|
282 |
+
" \n",
|
283 |
+
"# # Создаем датасеты для huggingface\n",
|
284 |
+
"# from datasets import Dataset\n",
|
285 |
+
"# from datasets.features import Audio\n",
|
286 |
+
" \n",
|
287 |
+
"# train_dataset = Dataset.from_pandas(filtered_train_df)\n",
|
288 |
+
"# test_dataset = Dataset.from_pandas(filtered_test_df)\n",
|
289 |
+
"# print(\"Созданы датасеты\")\n",
|
290 |
+
" \n",
|
291 |
+
"# train_dataset = train_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
|
292 |
+
"# test_dataset = test_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
|
293 |
+
"# print(\"Колонки аудио преобразованы\")"
|
294 |
+
]
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"cell_type": "code",
|
298 |
+
"execution_count": null,
|
299 |
+
"id": "89853d1c-a7db-498b-bd45-bc6e72b86b4e",
|
300 |
+
"metadata": {},
|
301 |
+
"outputs": [],
|
302 |
+
"source": []
|
303 |
+
}
|
304 |
+
],
|
305 |
+
"metadata": {
|
306 |
+
"environment": {
|
307 |
+
"kernel": "conda-base-py",
|
308 |
+
"name": "workbench-notebooks.m126",
|
309 |
+
"type": "gcloud",
|
310 |
+
"uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m126"
|
311 |
+
},
|
312 |
+
"kernelspec": {
|
313 |
+
"display_name": "Python 3 (ipykernel) (Local)",
|
314 |
+
"language": "python",
|
315 |
+
"name": "conda-base-py"
|
316 |
+
},
|
317 |
+
"language_info": {
|
318 |
+
"codemirror_mode": {
|
319 |
+
"name": "ipython",
|
320 |
+
"version": 3
|
321 |
+
},
|
322 |
+
"file_extension": ".py",
|
323 |
+
"mimetype": "text/x-python",
|
324 |
+
"name": "python",
|
325 |
+
"nbconvert_exporter": "python",
|
326 |
+
"pygments_lexer": "ipython3",
|
327 |
+
"version": "3.10.15"
|
328 |
+
}
|
329 |
+
},
|
330 |
+
"nbformat": 4,
|
331 |
+
"nbformat_minor": 5
|
332 |
+
}
|
filtered_test_dataset.tsv
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Unnamed: 0 № Date Francaise sentence path File 2 File 3 Consultant (S= Sualio, Y=Yacou_what'sup Unnamed: 13 Unnamed: 14 Unnamed: 15 sentence_normalized audio
|
2 |
+
1099 1832.0 2020-10-10 La poule va l a pique Mɛnɛnɛ be wo pa ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1832_10012020.wav S Mɛnɛnɛ be wo pa Mɛnɛnɛ be wo pa Mɛnɛnɛ be wo pa gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1832_10012020.wav
|
3 |
+
948 1632.0 2020-01-08 Vous n'avez pas ka̋ nù pálá ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1632untitled.wav S ka nu pala ɔ ka nu pala ɔ ka nu pala ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1632untitled.wav
|
4 |
+
49 111.0 2019-12-26 Je vois 1 chien ŋ̋ gɛ̋ŋ̋ dó yé ngn/clips/Audio_Ngen_2019_2020_2021/26122019/111.wav S ŋ gɛŋ do ye ŋ gɛŋ do ye ŋ gɛŋ do ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/111.wav
|
5 |
+
706 1249.0 2020-01-04 10 bù ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1249untitled.wav S bu bu bu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1249untitled.wav
|
6 |
+
342 800.0 2019-12-30 riz màló ngn/clips/Audio_Ngen_2019_2020_2021/30122019/800untitled.wav S malo malo malo gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/800untitled.wav
|
7 |
+
823 1468.0 2020-01-06 poitrine dìŋ gɔ́ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1468untitled.wav S diŋ gɔ diŋ gɔ diŋ gɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1468untitled.wav
|
8 |
+
306 747.0 2019-12-31 9 ve̋lì ti̋zì ngn/clips/Audio_Ngen_2019_2020_2021/31122019/747untitled.wav S veli tizi veli tizi veli tizi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/747untitled.wav
|
9 |
+
700 1243.0 2020-01-04 10 gɔ̋ bú ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1243untitled.wav S gɔ bu gɔ bu gɔ bu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1243untitled.wav
|
10 |
+
629 1157.0 2020-01-03 Vous envoyez Ka nu ba gba ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1157untitled.wav S Ka nu ba gba Ka nu ba gba Ka nu ba gba gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1157untitled.wav
|
11 |
+
76 151.0 2019-12-26 cendres yépé ngn/clips/Audio_Ngen_2019_2020_2021/26122019/151.wav S yepe yepe yepe gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/151.wav
|
12 |
+
247 662.0 2019-12-30 Leurs wȍ yȍrȍ pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/662untitled.wav S wo yoro palaŋ wo yoro palaŋ wo yoro palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/662untitled.wav
|
13 |
+
649 1178.0 2020-01-03 Maison de femme Lìŋ̀ pàlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1178untitled.wav S Liŋ pala Liŋ pala Liŋ pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1178untitled.wav
|
14 |
+
429 947.0 2020-01-02 Ils vont Wò nűma̋ yȉ gbɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/947_02012020.wav S Wo numa yi gbɛ Wo numa yi gbɛ Wo numa yi gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/947_02012020.wav
|
15 |
+
78 156.0 2019-12-26 trou yɛ̏rɛ̏ ngn/clips/Audio_Ngen_2019_2020_2021/26122019/156.wav S yɛrɛ yɛrɛ yɛrɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/156.wav
|
16 |
+
626 1154.0 2020-01-03 T'envoye A nu na ba gbagbɛ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1154untitled.wav S A nu na ba gbagbɛ A nu na ba gbagbɛ A nu na ba gbagbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1154untitled.wav
|
17 |
+
1074 1804.0 2020-01-09 mensonge kḭ́ná̰ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1804untitled.wav S kina kina kina gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1804untitled.wav
|
18 |
+
1077 1807.0 2020-01-09 vous kà ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1807untitled.wav S ka ka ka gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1807untitled.wav
|
19 |
+
192 391.0 2019-12-28 Je vois 7 marmites ŋ̋ yőrő síénú yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/391untitled.wav S ŋ yoro sienu ye ŋ yoro sienu ye ŋ yoro sienu ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/391untitled.wav
|
20 |
+
350 809.0 2019-12-31 9 branche gùŋ ti̋zī ngn/clips/Audio_Ngen_2019_2020_2021/31122019/809untitled.wav S guŋ tizi guŋ tizi guŋ tizi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/809untitled.wav
|
21 |
+
596 1124.0 2020-01-03 Nous baillons bè lɛ́wóbòlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1124untitled.wav S be lɛwobola be lɛwobola be lɛwobola gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1124untitled.wav
|
22 |
+
873 1545.0 2020-01-07 Tu nous chatoille Ye̋ bá nɛ̀kéŋ́ lȁ ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1545untitled.wav S Ye ba nɛkeŋ la Ye ba nɛkeŋ la Ye ba nɛkeŋ la gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1545untitled.wav
|
23 |
+
817 1462.0 2020-01-06 2 Poitrine dìŋ gɔ́ pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1462untitled.wav S diŋ gɔ palaŋ diŋ gɔ palaŋ diŋ gɔ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1462untitled.wav
|
24 |
+
816 1461.0 2020-01-06 2 Marmite da yoro palaŋ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1461untitled.wav S da yoro palaŋ da yoro palaŋ da yoro palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1461untitled.wav
|
25 |
+
1121 1855.0 2020-10-10 Ils ne Wò bà wó lé pé ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1855_10012020.wav S Wo ba wo le pe ɔ Wo ba wo le pe ɔ Wo ba wo le pe ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1855_10012020.wav
|
26 |
+
410 885.0 2020-01-02 1 âne súfále̋ dó ngn/clips/Audio_Ngen_2019_2020_2021/02012020/885_02012020.wav S sufale do sufale do sufale do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/885_02012020.wav
|
27 |
+
277 702.0 2019-12-31 Nos bá tɛ̋ sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/702untitled.wav S ba tɛ sienu ba tɛ sienu ba tɛ sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/702untitled.wav
|
28 |
+
792 1434.0 2020-01-06 Poitrine dà gɔ́ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1434untitled.wav S da gɔ da gɔ da gɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1434untitled.wav
|
29 |
+
1085 1816.0 2020-01-09 bon gíɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1816untitled.wav S giɛ giɛ giɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1816untitled.wav
|
30 |
+
949 1633.0 2020-01-08 Ils n'ont pas wa̋ nù pálá ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1633untitled.wav S wa nu pala ɔ wa nu pala ɔ wa nu pala ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1633untitled.wav
|
31 |
+
51 113.0 2019-12-26 Je vois 5 chiens ŋ̋ gɛ́ŋ́ sɔ̋ŋ̋ yé ngn/clips/Audio_Ngen_2019_2020_2021/26122019/113.wav S ŋ gɛŋ sɔŋ ye ŋ gɛŋ sɔŋ ye ŋ gɛŋ sɔŋ ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/113.wav
|
32 |
+
196 396.0 2019-12-28 Je vois 9 bouteils de alcool ̀ŋ̋ yò ꜝti̋zì yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/396untitled.wav S ŋ yo ꜝtizi ye ŋ yo ꜝtizi ye ŋ yo ꜝtizi ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/396untitled.wav
|
33 |
+
370 837.0 2020-01-02 veine bàní ngn/clips/Audio_Ngen_2019_2020_2021/02012020/837_02012020.wav S bani bani bani gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/837_02012020.wav
|
34 |
+
70 144.0 2019-12-26 1 chat jāŋúmá dó ngn/clips/Audio_Ngen_2019_2020_2021/26122019/144.wav S jaŋuma do jaŋuma do jaŋuma do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/144.wav
|
35 |
+
265 680.0 2019-12-30 Leur wò gɔ̋ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/680untitled.wav S wo gɔ wo gɔ wo gɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/680untitled.wav
|
36 |
+
1152 1886.0 2020-10-10 Je ne vais pas seche ma wɛ lɛ pia ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1886_10012020.wav S ma wɛ lɛ pia ma wɛ lɛ pia ma wɛ lɛ pia gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1886_10012020.wav
|
37 |
+
1067 1797.0 2020-01-09 embrasser chènwódȁlè ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1797untitled.wav S chenwodale chenwodale chenwodale gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1797untitled.wav
|
38 |
+
1105 1838.0 2020-10-10 La poule ne va pas les pique Mɛnɛnɛ ba wo pa a ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1838_10012020.wav S Mɛnɛnɛ ba wo pa a Mɛnɛnɛ ba wo pa a Mɛnɛnɛ ba wo pa a gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1838_10012020.wav
|
39 |
+
707 1250.0 2020-01-04 10 bù ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1250untitled.wav S bu bu bu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1250untitled.wav
|
40 |
+
374 841.0 2020-01-02 écureuil baŋbo+G829:G841 ngn/clips/Audio_Ngen_2019_2020_2021/02012020/841_02012020.wav S baŋbo+G829:G841 baŋbo+G829:G841 baŋbo+G829:G841 gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/841_02012020.wav
|
41 |
+
710 1263.0 2020-01-04 20 gő bùá pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1263untitled.wav S go bua palaŋ go bua palaŋ go bua palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1263untitled.wav
|
42 |
+
184 383.0 2019-12-28 Je vois 7 termites ŋ̋ dő síénú yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/383untitled.wav S ŋ do sienu ye ŋ do sienu ye ŋ do sienu ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/383untitled.wav
|
43 |
+
423 941.0 2020-01-02 Ils Wő yì lȁ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/941_02012020.wav S Wo yi la Wo yi la Wo yi la gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/941_02012020.wav
|
44 |
+
208 445.0 2019-12-28 Il y a 1 cobra dò dó be̋ nɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/28122019/445untitled.wav S do do be nɔ do do be nɔ do do be nɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/445untitled.wav
|
45 |
+
275 700.0 2019-12-31 Tes í tɛ̋ sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/700untitled.wav S i tɛ sienu i tɛ sienu i tɛ sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/700untitled.wav
|
46 |
+
198 398.0 2019-12-28 Je vois 2 cobra ŋ̋ dő pàlàŋ yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/398untitled.wav S ŋ do palaŋ ye ŋ do palaŋ ye ŋ do palaŋ ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/398untitled.wav
|
47 |
+
924 1607.0 2020-01-08 Vous ne boiviez pas Kɛ ka yi mi ɔ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1607untitled.wav S Kɛ ka yi mi ɔ Kɛ ka yi mi ɔ Kɛ ka yi mi ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1607untitled.wav
|
48 |
+
912 1595.0 2020-01-08 Vous n'avez pas Ka yi mi ɔ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1595untitled.wav S Ka yi mi ɔ Ka yi mi ɔ Ka yi mi ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1595untitled.wav
|
49 |
+
918 1601.0 2020-01-08 Vous boiviez Kɛ ka yi mi ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1601untitled.wav S Kɛ ka yi mi Kɛ ka yi mi Kɛ ka yi mi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1601untitled.wav
|
50 |
+
239 654.0 2019-12-30 Notre bá yórȍ или bá yòrȍ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/654untitled.wav S ba yoro или ba yoro ba yoro или ba yoro ba yoro или ba yoro gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/654untitled.wav
|
51 |
+
432 950.0 2020-01-02 Il a A̋ yȉ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/950_02012020.wav S A yi A yi A yi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/950_02012020.wav
|
52 |
+
859 1527.0 2020-01-07 Swale s'est habitue à la tabac Tawalɛ e Sualuo ɲɔ̀dɔ̀lɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1526untitled.wav S Tawalɛ e Sualuo ɲɔdɔlɛ Tawalɛ e Sualuo ɲɔdɔlɛ Tawalɛ e Sualuo ɲɔdɔlɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1526untitled.wav
|
53 |
+
594 1122.0 2020-01-03 Tu baille yé lɛ́wóbòlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1122untitled.wav S ye lɛwobola ye lɛwobola ye lɛwobola gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1122untitled.wav
|
54 |
+
631 1159.0 2020-01-03 J'envoye Nu na ka gbagbɛ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1159untitled.wav S Nu na ka gbagbɛ Nu na ka gbagbɛ Nu na ka gbagbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1159untitled.wav
|
55 |
+
620 1148.0 2020-01-03 T'envoye A nu na azoŋ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1148untitled.wav S A nu na azoŋ A nu na azoŋ A nu na azoŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1148untitled.wav
|
56 |
+
745 1326.0 ȁkɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1326untitled.wav S akɔ akɔ akɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1326untitled.wav
|
57 |
+
1032 1753.0 2020-01-09 ici je habite ici nɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1753untitled.wav S nɔ nɔ nɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1753untitled.wav
|
58 |
+
990 1682.0 2020-01-08 Ils ne tuent (en train de) Wò ba̋ dő nȍ zɛ̀ ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1682untitled.wav S Wo ba do no zɛ ɔ Wo ba do no zɛ ɔ Wo ba do no zɛ ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1682untitled.wav
|
59 |
+
605 1133.0 2020-01-03 Vous allez kà lő màlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1133untitled.wav S ka lo mala ka lo mala ka lo mala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1133untitled.wav
|
60 |
+
428 946.0 2020-01-02 Vous allez Kà nűma̋ yȉ gbɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/946_02012020.wav S Ka numa yi gbɛ Ka numa yi gbɛ Ka numa yi gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/946_02012020.wav
|
61 |
+
969 1654.0 2020-01-08 Tu ne tues Ì bá dő zɛ̀ ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1654untitled.wav S I ba do zɛ ɔ I ba do zɛ ɔ I ba do zɛ ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1654untitled.wav
|
62 |
+
693 1236.0 2020-01-04 8 pálá sɔ́ŋ̀bȁ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1236untitled.wav S pala sɔŋba pala sɔŋba pala sɔŋba gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1236untitled.wav
|
63 |
+
371 838.0 2020-01-02 aisselle bànnṵ̀lá̰ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/838_02012020.wav S bannula bannula bannula gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/838_02012020.wav
|
64 |
+
367 833.0 2020-01-02 toucher ba̋lɛ̋àmàn ngn/clips/Audio_Ngen_2019_2020_2021/02012020/833_02012020.wav S balɛaman balɛaman balɛaman gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/833_02012020.wav
|
65 |
+
588 1116.0 2020-01-03 repondre lɛ́sḭ́lɛ́ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1116untitled.wav S lɛsilɛ lɛsilɛ lɛsilɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1116untitled.wav
|
66 |
+
353 813.0 2019-12-31 2 cameleon dódóhőlɛ̋ páláŋ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/813untitled.wav S dodoholɛ palaŋ dodoholɛ palaŋ dodoholɛ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/813untitled.wav
|
67 |
+
778 1417.0 2020-01-05 gauyave agoya ngn/clips/Audio_Ngen_2019_2020_2021/05012020/1417untitled.wav S agoya agoya agoya gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/05012020/1417untitled.wav
|
68 |
+
425 943.0 2020-01-02 Tu vais Ì nűma̋ yȉ gbɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/943_02012020.wav S I numa yi gbɛ I numa yi gbɛ I numa yi gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/943_02012020.wav
|
69 |
+
1212 1965.0 2020-01-12 sasesoir trɔ́kálálɛ́ ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1965untitled.wav S trɔkalalɛ trɔkalalɛ trɔkalalɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1965untitled.wav
|
70 |
+
624 1152.0 2020-01-03 Ils envoyent Wa nu na azoŋ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1152untitled.wav S Wa nu na azoŋ Wa nu na azoŋ Wa nu na azoŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1152untitled.wav
|
71 |
+
1163 1914.0 2020-01-12 2 nouriture pɔbelelɛ palaŋ ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1914untitled.wav S pɔbelelɛ palaŋ pɔbelelɛ palaŋ pɔbelelɛ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1914untitled.wav
|
72 |
+
855 1522.0 2020-01-07 1 coq mɛ̀lɛ̀sa̋ dó ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1521untitled.wav S mɛlɛsa do mɛlɛsa do mɛlɛsa do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1521untitled.wav
|
73 |
+
602 1130.0 2020-01-03 Tu vais ì lő màlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1130untitled.wav S i lo mala i lo mala i lo mala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1130untitled.wav
|
74 |
+
115 227.0 2019-12-27 jambe gba̋ ngn/clips/Audio_Ngen_2019_2020_2021/27122019/227untitled.wav S gba gba gba gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/27122019/227untitled.wav
|
75 |
+
163 349.0 2019-12-28 1 manioc gbe̋ne̋ dó ngn/clips/Audio_Ngen_2019_2020_2021/28122019/349untitled.wav S gbene do gbene do gbene do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/349untitled.wav
|
76 |
+
261 676.0 2019-12-30 Ton í gɔ̋ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/676untitled.wav S i gɔ i gɔ i gɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/676untitled.wav
|
77 |
+
237 652.0 2019-12-30 Ton í yòrȍ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/652untitled.wav S i yoro i yoro i yoro gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/652untitled.wav
|
78 |
+
814 1457.0 2020-01-06 2 Marmite gu yoro palaŋ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1457untitled.wav S gu yoro palaŋ gu yoro palaŋ gu yoro palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1457untitled.wav
|
79 |
+
575 1103.0 2020-01-03 feille láŋ́ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1103untitled.wav S laŋ laŋ laŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1103untitled.wav
|
80 |
+
358 819.0 2019-12-31 2 poumons pùlù páláŋ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/819untitled.wav S pulu palaŋ pulu palaŋ pulu palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/819untitled.wav
|
81 |
+
233 648.0 2019-12-30 Nos bá yó pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/648untitled.wav S ba yo palaŋ ba yo palaŋ ba yo palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/648untitled.wav
|
82 |
+
597 1125.0 2020-01-03 Vous baillez kè lɛ́wóbòlà ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1125untitled.wav S ke lɛwobola ke lɛwobola ke lɛwobola gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1125untitled.wav
|
83 |
+
435 953.0 2020-01-02 Ils ont Wa̋ yȉ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/953_02012020.wav S Wa yi Wa yi Wa yi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/953_02012020.wav
|
84 |
+
1006 1707.0 2020-01-08 Je vais (ŋo) nő málá pɔ̀ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1707untitled.wav S (ŋo) no mala pɔɔ (ŋo) no mala pɔɔ (ŋo) no mala pɔɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1707untitled.wav
|
85 |
+
1140 1874.0 2020-10-10 Ils Wó la̋ ?á pèŋ̀ gbɛ̏ ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1874_10012020.wav S Wo la ?a peŋ gbɛ Wo la ?a peŋ gbɛ Wo la ?a peŋ gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1874_10012020.wav
|
86 |
+
1072 1802.0 2020-01-09 déchirer chi̋chi̋nɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1802untitled.wav S chichinɛ chichinɛ chichinɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1802untitled.wav
|
87 |
+
785 1427.0 2020-01-06 Alcool gɔ̀ŋ yȍ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1427untitled.wav S gɔŋ yo gɔŋ yo gɔŋ yo gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1427untitled.wav
|
88 |
+
1240 1997.0 2022-01-30 L'anne sepasse j'ai parle jɛ́ gbi̋ŋ̋ zírȉ ŋ́ wélé pè ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1997untitled.wav C:/Users/Admin/Desktop/Ngen/Audio+previous_data/Audio_Ngen_2019_2020_2021/2022/30012022/220130-033.wav S jɛ gbiŋ ziri ŋ wele pe jɛ gbiŋ ziri ŋ wele pe jɛ gbiŋ ziri ŋ wele pe gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1997untitled.wav
|
89 |
+
664 1201.0 2020-01-04 3 fɛ̏ ȁkɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1201untitled.wav S fɛ akɔ fɛ akɔ fɛ akɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1201untitled.wav
|
90 |
+
579 1107.0 2020-01-03 mussulman làpúlúsílé ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1107untitled.wav S lapulusile lapulusile lapulusile gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1107untitled.wav
|
91 |
+
722 1279.0 2020-01-04 100 gȁlȁ hàdò ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1279untitled.wav S gala hado gala hado gala hado gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1279untitled.wav
|
92 |
+
1022 1735.0 2020-01-08 Il vait é lő ma̋ pɔ̀ɔ́ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1735untitled.wav S e lo ma pɔɔ e lo ma pɔɔ e lo ma pɔɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1735untitled.wav
|
93 |
+
1148 1882.0 2020-10-10 Nous sechons bȅ wɛ̋ lɛ̏ píà là ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1882_10012020.wav S be wɛ lɛ pia la be wɛ lɛ pia la be wɛ lɛ pia la gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1882_10012020.wav
|
94 |
+
493 1011.0 2020-01-02 Tu ne ì bá yɔ́ mḭ̏ wɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/1011_02012020.wav S i ba yɔ mi wɔ i ba yɔ mi wɔ i ba yɔ mi wɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/1011_02012020.wav
|
95 |
+
1004 1705.0 2020-01-08 Vous ne tuiriez pas Ká bá dó lɛ̏ zɛ̀ ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1705untitled.wav S Ka ba do lɛ zɛ ɔ Ka ba do lɛ zɛ ɔ Ka ba do lɛ zɛ ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1705untitled.wav
|
96 |
+
168 357.0 2019-12-28 probleme we̋le̋ ngn/clips/Audio_Ngen_2019_2020_2021/28122019/357untitled.wav S wele wele wele gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/357untitled.wav
|
97 |
+
820 1465.0 2020-01-06 2 Marmite dìŋ yòrò pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1465untitled.wav S diŋ yoro palaŋ diŋ yoro palaŋ diŋ yoro palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1465untitled.wav
|
98 |
+
614 1142.0 2020-01-03 T'envoye A nu na azoŋ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1142untitled.wav S A nu na azoŋ A nu na azoŋ A nu na azoŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1142untitled.wav
|
99 |
+
1095 1828.0 2020-10-10 Tu Ye butɛlilɛ pala ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1828_10012020.wav S Ye butɛlilɛ pala Ye butɛlilɛ pala Ye butɛlilɛ pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1828_10012020.wav
|
100 |
+
622 1150.0 2020-01-03 Nous envoyons Ba nu na azoŋ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1150untitled.wav S Ba nu na azoŋ Ba nu na azoŋ Ba nu na azoŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1150untitled.wav
|
101 |
+
968 1653.0 2020-01-08 Je ne tue Má dő zɛ̀ ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1653untitled.wav S Ma do zɛ ɔ Ma do zɛ ɔ Ma do zɛ ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1653untitled.wav
|
102 |
+
938 1621.0 2020-01-08 J'ai ŋ̋ nù pálá ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1621untitled.wav S ŋ nu pala ŋ nu pala ŋ nu pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1621untitled.wav
|
103 |
+
453 971.0 2020-01-02 Ils n'ont pas Wá yí ɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/971_02012020.wav S Wa yi ɔ Wa yi ɔ Wa yi ɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/971_02012020.wav
|
104 |
+
939 1622.0 2020-01-08 Tu as ya̋ nù pálá ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1622untitled.wav S ya nu pala ya nu pala ya nu pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1622untitled.wav
|
105 |
+
297 736.0 2019-12-31 1 gbɔ̏n dó ngn/clips/Audio_Ngen_2019_2020_2021/30122019/736untitled.wav S gbɔn do gbɔn do gbɔn do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/736untitled.wav
|
106 |
+
1053 1779.0 2020-01-09 lait (du moutons etc) nɔ́nɔ́ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1779untitled.wav S nɔnɔ nɔnɔ nɔnɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1779untitled.wav
|
107 |
+
107 204.0 2019-12-27 7 poitrine gɔ̋ sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/27122019/204untitled.wav S gɔ sienu gɔ sienu gɔ sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/27122019/204untitled.wav
|
108 |
+
705 1248.0 2020-01-04 10 bú ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1248untitled.wav S bu bu bu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1248untitled.wav
|
109 |
+
482 1000.0 2020-01-02 Il va è lőma̋ yɔ̏ mḭ́ gbɛ̏ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/1000_02012020.wav S e loma yɔ mi gbɛ e loma yɔ mi gbɛ e loma yɔ mi gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/1000_02012020.wav
|
110 |
+
1051 1777.0 2020-01-09 2 pluie ɲɔ̏ pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1777untitled.wav S ɲɔ palaŋ ɲɔ palaŋ ɲɔ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1777untitled.wav
|
111 |
+
915 1598.0 2020-01-08 Tu boivais Kɛ i yi mi ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1598untitled.wav S Kɛ i yi mi Kɛ i yi mi Kɛ i yi mi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1598untitled.wav
|
112 |
+
382 849.0 2020-01-02 unir bɛŋɛ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/849_02012020.wav S bɛŋɛ bɛŋɛ bɛŋɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/849_02012020.wav
|
113 |
+
573 1101.0 2020-01-03 diminuer lábúlɛ́ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1101untitled.wav S labulɛ labulɛ labulɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1101untitled.wav
|
114 |
+
361 824.0 2020-01-02 panier go ngn/clips/Audio_Ngen_2019_2020_2021/31122019/824untitled.wav S go go go gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/824untitled.wav
|
115 |
+
351 810.0 2019-12-31 fūfūū (léger) fófó ngn/clips/Audio_Ngen_2019_2020_2021/31122019/810untitled.wav S fofo fofo fofo gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/810untitled.wav
|
116 |
+
1124 1858.0 2020-10-10 Tu as yà wé lé pè ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1858_10012020.wav S ya we le pe ya we le pe ya we le pe gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1858_10012020.wav
|
117 |
+
365 828.0 2020-01-02 2 cochons gbőŋ̋ palaŋ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/828_02012020.wav S gboŋ palaŋ gboŋ palaŋ gboŋ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/828_02012020.wav
|
118 |
+
953 1637.0 2020-01-08 Nous allons Bá nű mà pálá ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1637untitled.wav S Ba nu ma pala Ba nu ma pala Ba nu ma pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1637untitled.wav
|
119 |
+
346 805.0 2019-12-31 2 formi magnan zúrúŋ́ páláŋ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/805untitled.wav S zuruŋ palaŋ zuruŋ palaŋ zuruŋ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/805untitled.wav
|
120 |
+
111 214.0 2019-12-27 2 noix de kola gɔ́lí pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/27122019/214untitled.wav S gɔli palaŋ gɔli palaŋ gɔli palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/27122019/214untitled.wav
|
121 |
+
691 1234.0 2020-01-04 8 dó sɔ́ŋ̀bȁ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1234untitled.wav S do sɔŋba do sɔŋba do sɔŋba gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1234untitled.wav
|
122 |
+
289 714.0 2019-12-31 Nos bá dó sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/714untitled.wav S ba do sienu ba do sienu ba do sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/714untitled.wav
|
123 |
+
882 1556.0 2020-01-07 Il ne É bá nűmà pálà ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1556untitled.wav S E ba numa pala E ba numa pala E ba numa pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1556untitled.wav
|
124 |
+
578 1106.0 2020-01-03 ciel làpúlú ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1106untitled.wav S lapulu lapulu lapulu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1106untitled.wav
|
125 |
+
175 367.0 2019-12-28 9 lions jàra̋ ti̋zì ngn/clips/Audio_Ngen_2019_2020_2021/28122019/367untitled.wav S jara tizi jara tizi jara tizi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/367untitled.wav
|
126 |
+
240 655.0 2019-12-30 Votre ká yórȍ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/655untitled.wav S ka yoro ka yoro ka yoro gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/655untitled.wav
|
127 |
+
828 1475.0 2020-01-06 Le termite est dó lɛ tɛ̰ɛ̰ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1475untitled.wav S do lɛ tɛɛ do lɛ tɛɛ do lɛ tɛɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1475untitled.wav
|
128 |
+
934 1617.0 2020-01-08 Il ne Kɛ e ba yi la a ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1617untitled.wav S Kɛ e ba yi la a Kɛ e ba yi la a Kɛ e ba yi la a gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1617untitled.wav
|
129 |
+
796 1438.0 2020-01-06 Poitrine dìŋ gɔ́ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1438untitled.wav S diŋ gɔ diŋ gɔ diŋ gɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1438untitled.wav
|
130 |
+
1213 1966.0 2020-01-12 en bas trɔ́má ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1966untitled.wav S trɔma trɔma trɔma gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1966untitled.wav
|
131 |
+
1101 1834.0 2020-10-10 La poule l'a piquer Mɛnɛnɛ a pa ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1834_10012020.wav S Mɛnɛnɛ a pa Mɛnɛnɛ a pa Mɛnɛnɛ a pa gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1834_10012020.wav
|
132 |
+
1098 1831.0 2020-10-10 La poule les a piquer Mɛnɛnɛ a wo pa ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1831_10012020.wav S Mɛnɛnɛ a wo pa Mɛnɛnɛ a wo pa Mɛnɛnɛ a wo pa gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1831_10012020.wav
|
133 |
+
670 1213.0 2020-01-04 5 gɔ̋ sɔ̋ŋ̋ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1213untitled.wav S gɔ sɔŋ gɔ sɔŋ gɔ sɔŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1213untitled.wav
|
134 |
+
679 1222.0 2020-01-04 6 dő sɔ̋dù ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1222untitled.wav S do sɔdu do sɔdu do sɔdu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1222untitled.wav
|
135 |
+
1211 1963.0 2020-01-12 igname trɛ́ ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1963untitled.wav S trɛ trɛ trɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1963untitled.wav
|
136 |
+
271 690.0 2019-12-31 Nos ka̋ yő sienȕ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/690untitled.wav S ka yo sienu ka yo sienu ka yo sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/690untitled.wav
|
137 |
+
1115 1849.0 2020-10-10 Tu as Ya ye sa ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1849_10012020.wav S Ya ye sa Ya ye sa Ya ye sa gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1849_10012020.wav
|
138 |
+
1197 1949.0 2020-01-12 poivre séwé ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1949untitled.wav S sewe sewe sewe gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/12012020/1949untitled.wav
|
139 |
+
56 118.0 2019-12-26 Je vois 5 alcool ŋ̋ yó sɔ̋ŋ̋ yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/118.wav S ŋ yo sɔŋ ye ŋ yo sɔŋ ye ŋ yo sɔŋ ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/118.wav
|
140 |
+
886 1561.0 2020-01-08 Je mé yí mì là ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1561untitled.wav S me yi mi la me yi mi la me yi mi la gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/08012020/1561untitled.wav
|
141 |
+
621 1149.0 2020-01-03 Il envoye A nu na azoŋ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1149untitled.wav S A nu na azoŋ A nu na azoŋ A nu na azoŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1149untitled.wav
|
142 |
+
1300 2066.0 2020-01-14 Je vois une petite femme (о росте) ŋ dro(personne petit) liŋ siŋ do ye ngn/clips/Audio_Ngen_2019_2020_2021/14012020/2066untitled.wav S ŋ dro(personne petit) liŋ siŋ do ye ŋ dro(personne petit) liŋ siŋ do ye ŋ dro(personne petit) liŋ siŋ do ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/14012020/2066untitled.wav
|
143 |
+
695 1238.0 2020-01-04 8 wɛ̀ sɔ̀ŋ̏bȁ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1238untitled.wav S wɛ sɔŋba wɛ sɔŋba wɛ sɔŋba gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1238untitled.wav
|
144 |
+
1292 2058.0 2020-01-14 Je vois 5 gazelle ŋ́ bɛ́ŋ̏ sɔ̀ŋ̀ yè или ŋ̋ bɛ́ŋ̏ sɔ̀ŋ̀ yè ngn/clips/Audio_Ngen_2019_2020_2021/14012020/2058untitled.wav S ŋ bɛŋ sɔŋ ye или ŋ bɛŋ sɔŋ ye ŋ bɛŋ sɔŋ ye или ŋ bɛŋ sɔŋ ye ŋ bɛŋ sɔŋ ye или ŋ bɛŋ sɔŋ ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/14012020/2058untitled.wav
|
145 |
+
590 1118.0 2020-01-03 offencer lɛ́wéwèlɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1118untitled.wav S lɛwewelɛ lɛwewelɛ lɛwewelɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1118untitled.wav
|
146 |
+
874 1546.0 2020-01-07 Tu les chatoille Yè wő nɛ̀kéŋ́ lȁ ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1546untitled.wav S Ye wo nɛkeŋ la Ye wo nɛkeŋ la Ye wo nɛkeŋ la gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1546untitled.wav
|
147 |
+
123 250.0 2019-12-27 7 miel gbɔ̰́ sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/27122019/250untitled.wav S gbɔ sienu gbɔ sienu gbɔ sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/27122019/250untitled.wav
|
148 |
+
270 686.0 2019-12-31 Leurs wò gő pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/31122019/686untitled.wav S wo go palaŋ wo go palaŋ wo go palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/686untitled.wav
|
149 |
+
309 754.0 2019-12-30 2 chanson lɛ̀lɛ̀ŋ pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/30122019/754untitled.wav S lɛlɛŋ palaŋ lɛlɛŋ palaŋ lɛlɛŋ palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/30122019/754untitled.wav
|
150 |
+
884 1558.0 2020-01-07 Vous ne Ká bá nűmà pálà ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1558untitled.wav S Ka ba numa pala Ka ba numa pala Ka ba numa pala gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/07012020/1558untitled.wav
|
151 |
+
1065 1795.0 2020-01-09 année chɛ̏ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1795untitled.wav S chɛ chɛ chɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1795untitled.wav
|
152 |
+
591 1119.0 2020-01-03 s'etonner lɛ́wílɛ́ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1119untitled.wav S lɛwilɛ lɛwilɛ lɛwilɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1119untitled.wav
|
153 |
+
427 945.0 2020-01-02 Nous allons Bà nűma̋ yȉ gbɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/945_02012020.wav S Ba numa yi gbɛ Ba numa yi gbɛ Ba numa yi gbɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/945_02012020.wav
|
154 |
+
650 1179.0 2020-01-03 Marmite de femme Lìŋ̀ yòrȍ ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1179untitled.wav S Liŋ yoro Liŋ yoro Liŋ yoro gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/03012020/1179untitled.wav
|
155 |
+
113 216.0 2019-12-27 9 noix de kola gɔ́lí ti̋zì ngn/clips/Audio_Ngen_2019_2020_2021/27122019/216untitled.wav S gɔli tizi gɔli tizi gɔli tizi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/27122019/216untitled.wav
|
156 |
+
793 1435.0 2020-01-06 Alcool dà yȍ или dá yȍ ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1435untitled.wav S da yo или da yo da yo или da yo da yo или da yo gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/06122020/1435untitled.wav
|
157 |
+
174 366.0 2019-12-28 9 os wɛ̋lɛ̋ ti̋zì ngn/clips/Audio_Ngen_2019_2020_2021/28122019/366untitled.wav S wɛlɛ tizi wɛlɛ tizi wɛlɛ tizi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/366untitled.wav
|
158 |
+
430 948.0 2020-01-02 J'ai ŋ̋ yȉ ngn/clips/Audio_Ngen_2019_2020_2021/02012020/948_02012020.wav S ŋ yi ŋ yi ŋ yi gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/948_02012020.wav
|
159 |
+
1112 1845.0 2020-10-10 Couvette est plein pɛ́ŋ́ nɛ́ palɛ ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1845_10012020.wav S pɛŋ nɛ palɛ pɛŋ nɛ palɛ pɛŋ nɛ palɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/10012020/1845_10012020.wav
|
160 |
+
1035 1756.0 2020-01-09 en haut núŋ̋ ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1756untitled.wav S nuŋ nuŋ nuŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/09012020/1756untitled.wav
|
161 |
+
1280 2038.0 2020-01-13 mentir kíná-yɛ̀lɛ̀ ngn/clips/Audio_Ngen_2019_2020_2021/13012020/2038untitled.wav S kina-yɛlɛ kina-yɛlɛ kina-yɛlɛ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/13012020/2038untitled.wav
|
162 |
+
31 75.0 2019-12-25 7 tortues zàpli̋ sìènȕ ngn/clips/Audio_Ngen_2019_2020_2021/25122019/029_25122019.wav S zapli sienu zapli sienu zapli sienu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/25122019/029_25122019.wav
|
163 |
+
210 448.0 2019-12-28 Il y a 9 cobra dò ti̋zì be̋ nɔ̀ ngn/clips/Audio_Ngen_2019_2020_2021/28122019/448untitled.wav S do tizi be nɔ do tizi be nɔ do tizi be nɔ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/28122019/448untitled.wav
|
164 |
+
715 1269.0 2020-01-04 20 gàlà bùá pàlàŋ ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1269untitled.wav S gala bua palaŋ gala bua palaŋ gala bua palaŋ gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1269untitled.wav
|
165 |
+
354 814.0 2019-12-31 herbe pùpű ngn/clips/Audio_Ngen_2019_2020_2021/31122019/814untitled.wav S pupu pupu pupu gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/31122019/814untitled.wav
|
166 |
+
188 387.0 2020-01-02 Je vois 7 cadavre ŋ̋ gáé sìènȕ yè ngn/clips/Audio_Ngen_2019_2020_2021/26122019/387untitled.wav S ŋ gae sienu ye ŋ gae sienu ye ŋ gae sienu ye gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/26122019/387untitled.wav
|
167 |
+
363 826.0 2020-01-02 1 route tɛ̋ dó ngn/clips/Audio_Ngen_2019_2020_2021/02012020/826_02012020.wav S tɛ do tɛ do tɛ do gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/02012020/826_02012020.wav
|
168 |
+
668 1211.0 2020-01-04 4 Реализация ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1211untitled.wav S Реализация Реализация Реализация gs://ngen_model_fine_tuned/ngn/clips/Audio_Ngen_2019_2020_2021/04012020/1211untitled.wav
|
filtered_train_dataset.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3858906296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0357c29f6b59309bddd22b7893835b5af978ca5a7075a63281e81d95103265a7
|
3 |
size 3858906296
|
runs/Dec10_17-36-35_instance-20241206-091824/events.out.tfevents.1733852230.instance-20241206-091824
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38b463fbdda649426fb1469c61f2344ee54408247645ebe28b422094ce5a5fca
|
3 |
+
size 6473
|
runs/Dec10_17-40-16_instance-20241206-091824/events.out.tfevents.1733852448.instance-20241206-091824
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ffd2e88eadaf842d353af6af860e62a0822fde31ed4366fb8bf02d4398d142c
|
3 |
+
size 6473
|
runs/Dec11_07-51-40_instance-20241206-091824/events.out.tfevents.1733903600.instance-20241206-091824
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0e760a2678c9e6bb159861cdc1b31b9e9cfe8aeb7c36735e2fb7e03dedc7bf5
|
3 |
+
size 8475
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5368
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e58c540dcc8c04048209e1f06263dc4526ae53030cc4f3e0a38a278e3ed53001
|
3 |
size 5368
|