ziyjiang commited on
Commit
c1f26b8
·
verified ·
1 Parent(s): 99d9060

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +504 -0
config.json ADDED
@@ -0,0 +1,504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "TextEncoders": {
3
+ "bert": {
4
+ "config": "configs/config_bert.json",
5
+ "d_model": 768,
6
+ "fusion_layer": 9,
7
+ "name": "bert_base",
8
+ "pretrained": "bert-base-uncased"
9
+ },
10
+ "bert_large": {
11
+ "config": "configs/config_bert_large.json",
12
+ "d_model": 1024,
13
+ "fusion_layer": 19,
14
+ "name": "bert_large",
15
+ "pretrained": "bert-large-uncased"
16
+ },
17
+ "med_bert": {
18
+ "config": "configs/med_config.json",
19
+ "d_model": 768,
20
+ "name": "med_bert_base",
21
+ "pretrained": "bert-base-uncased"
22
+ },
23
+ "med_bert_large": {
24
+ "config": "configs/med_large_config.json",
25
+ "d_model": 768,
26
+ "name": "med_bert_large",
27
+ "pretrained": "bert-base-uncased"
28
+ }
29
+ },
30
+ "VisionEncoders": {},
31
+ "architectures": [
32
+ "InternVideo2_Stage2"
33
+ ],
34
+ "auto_map": {
35
+ "AutoConfig": "modeling_internvideo2.InternVideo2_Stage2_Config",
36
+ "AutoModel": "modeling_internvideo2.InternVideo2_Stage2"
37
+ },
38
+ "auto_resume": true,
39
+ "available_corpus": {
40
+ "anet_ret_train": {
41
+ "anno_path": "your_path",
42
+ "data_root": "",
43
+ "is_paragraph_retrieval": true,
44
+ "max_txt_l": 150,
45
+ "media_type": "video"
46
+ },
47
+ "anet_ret_val": {
48
+ "anno_path": "your_path",
49
+ "data_root": "",
50
+ "is_paragraph_retrieval": true,
51
+ "max_txt_l": 150,
52
+ "media_type": "video"
53
+ },
54
+ "audiocaps_ret_test": {
55
+ "anno_path": "your_path",
56
+ "data_root": "",
57
+ "media_type": "audio"
58
+ },
59
+ "audiocaps_ret_train": {
60
+ "anno_path": "your_path",
61
+ "data_root": "",
62
+ "media_type": "audio"
63
+ },
64
+ "cc12m": {
65
+ "anno_path": "your_path",
66
+ "data_root": "",
67
+ "media_type": "image"
68
+ },
69
+ "cc3m": {
70
+ "anno_path": "your_path",
71
+ "data_root": "",
72
+ "media_type": "image"
73
+ },
74
+ "cc3m_debug": {
75
+ "anno_path": "your_path",
76
+ "data_root": "",
77
+ "media_type": "image"
78
+ },
79
+ "charades_mc_test": {
80
+ "anno_path": "your_path",
81
+ "data_root": "",
82
+ "media_type": "video"
83
+ },
84
+ "clothov1_ret_test": {
85
+ "anno_path": "your_path",
86
+ "data_root": "",
87
+ "media_type": "audio"
88
+ },
89
+ "clothov1_ret_train": {
90
+ "anno_path": "your_path",
91
+ "data_root": "",
92
+ "media_type": "audio"
93
+ },
94
+ "clothov2_ret_test": {
95
+ "anno_path": "your_path",
96
+ "data_root": "",
97
+ "media_type": "audio"
98
+ },
99
+ "clothov2_ret_train": {
100
+ "anno_path": "your_path",
101
+ "data_root": "",
102
+ "media_type": "audio"
103
+ },
104
+ "coco": {
105
+ "anno_path": "your_path",
106
+ "data_root": "",
107
+ "jump_filter": true,
108
+ "media_type": "image"
109
+ },
110
+ "data_25m": [
111
+ {
112
+ "anno_path": "your_path",
113
+ "data_root": "",
114
+ "media_type": "video"
115
+ },
116
+ {
117
+ "anno_path": "your_path",
118
+ "data_root": "",
119
+ "media_type": "image"
120
+ },
121
+ {
122
+ "anno_path": "your_path",
123
+ "data_root": "",
124
+ "jump_filter": true,
125
+ "media_type": "image"
126
+ },
127
+ {
128
+ "anno_path": "your_path",
129
+ "data_root": "",
130
+ "jump_filter": true,
131
+ "media_type": "image"
132
+ },
133
+ {
134
+ "anno_path": "your_path",
135
+ "data_root": "",
136
+ "media_type": "image"
137
+ },
138
+ {
139
+ "anno_path": "your_path",
140
+ "data_root": "",
141
+ "media_type": "image"
142
+ }
143
+ ],
144
+ "debug": [
145
+ {
146
+ "anno_path": "your_path",
147
+ "data_root": "",
148
+ "media_type": "image"
149
+ },
150
+ {
151
+ "anno_path": "your_path",
152
+ "data_root": "",
153
+ "media_type": "video"
154
+ }
155
+ ],
156
+ "didemo_ret_test": {
157
+ "anno_path": "your_path",
158
+ "data_root": "",
159
+ "is_paragraph_retrieval": true,
160
+ "max_txt_l": 64,
161
+ "media_type": "video",
162
+ "trimmed30": true
163
+ },
164
+ "didemo_ret_train": {
165
+ "anno_path": "your_path",
166
+ "data_root": "",
167
+ "is_paragraph_retrieval": true,
168
+ "max_txt_l": 64,
169
+ "media_type": "video",
170
+ "trimmed30": true
171
+ },
172
+ "didemo_ret_val": {
173
+ "anno_path": "your_path",
174
+ "data_root": "",
175
+ "is_paragraph_retrieval": true,
176
+ "max_txt_l": 64,
177
+ "media_type": "video",
178
+ "trimmed30": true
179
+ },
180
+ "hmdb51_act_val": {
181
+ "anno_path": "your_path",
182
+ "data_root": "",
183
+ "is_act_rec": true,
184
+ "media_type": "video"
185
+ },
186
+ "internvid_v1": {
187
+ "anno_path": "your_path",
188
+ "data_root": "",
189
+ "jump_filter": true,
190
+ "media_type": "video"
191
+ },
192
+ "internvid_v2_avs_private": {
193
+ "anno_path": "your_path",
194
+ "caption_augmentation": {
195
+ "caption_sample_type": "avs_all"
196
+ },
197
+ "data_root": "",
198
+ "jump_filter": true,
199
+ "media_type": "audio_video",
200
+ "read_audio_from_video": true,
201
+ "read_clip_from_video": false,
202
+ "zero_audio_padding_for_video": true
203
+ },
204
+ "k400_act_val": {
205
+ "anno_path": "your_path",
206
+ "data_root": "",
207
+ "is_act_rec": true
208
+ },
209
+ "k600_act_val": {
210
+ "anno_path": "your_path",
211
+ "data_root": "",
212
+ "is_act_rec": true,
213
+ "media_type": "video"
214
+ },
215
+ "k700_act_val": {
216
+ "anno_path": "your_path",
217
+ "data_root": "",
218
+ "is_act_rec": true,
219
+ "media_type": "video"
220
+ },
221
+ "laion_2b": {
222
+ "anno_path": "your_path",
223
+ "data_root": "",
224
+ "jump_filter": true,
225
+ "media_type": "image"
226
+ },
227
+ "laion_coco": {
228
+ "anno_path": "your_path",
229
+ "data_root": "",
230
+ "jump_filter": true,
231
+ "media_type": "image"
232
+ },
233
+ "laion_pop": {
234
+ "anno_path": "your_path",
235
+ "data_root": "",
236
+ "jump_filter": true,
237
+ "media_type": "image"
238
+ },
239
+ "lsmdc_ret_test_1000": {
240
+ "anno_path": "your_path",
241
+ "data_root": "",
242
+ "media_type": "video"
243
+ },
244
+ "lsmdc_ret_train": {
245
+ "anno_path": "your_path",
246
+ "data_root": "",
247
+ "max_txt_l": 96,
248
+ "media_type": "video"
249
+ },
250
+ "lsmdc_ret_val": {
251
+ "anno_path": "your_path",
252
+ "data_root": "",
253
+ "max_txt_l": 96,
254
+ "media_type": "video"
255
+ },
256
+ "mit_act_val": {
257
+ "anno_path": "your_path",
258
+ "data_root": "",
259
+ "is_act_rec": true,
260
+ "media_type": "video"
261
+ },
262
+ "msrvtt_1k_test": {
263
+ "anno_path": "your_path",
264
+ "data_root": "",
265
+ "media_type": "video"
266
+ },
267
+ "msrvtt_ret_test1k": {
268
+ "anno_path": "your_path",
269
+ "data_root": "",
270
+ "media_type": "video"
271
+ },
272
+ "msrvtt_ret_train9k": {
273
+ "anno_path": "your_path",
274
+ "data_root": "",
275
+ "media_type": "video"
276
+ },
277
+ "msvd_ret_test": {
278
+ "anno_path": "your_path",
279
+ "data_root": "",
280
+ "max_txt_l": 64,
281
+ "media_type": "video"
282
+ },
283
+ "msvd_ret_train": {
284
+ "anno_path": "your_path",
285
+ "data_root": "",
286
+ "has_multi_txt_gt": true,
287
+ "max_txt_l": 64,
288
+ "media_type": "video"
289
+ },
290
+ "msvd_ret_val": {
291
+ "anno_path": "your_path",
292
+ "data_root": "",
293
+ "max_txt_l": 64,
294
+ "media_type": "video"
295
+ },
296
+ "pretrain_example_data_1B": [
297
+ {
298
+ "anno_path": "your_path",
299
+ "data_root": "",
300
+ "media_type": "image"
301
+ },
302
+ {
303
+ "anno_path": "your_path",
304
+ "data_root": "",
305
+ "media_type": "video"
306
+ }
307
+ ],
308
+ "pretrain_example_data_6B": [
309
+ {
310
+ "anno_path": "your_path",
311
+ "data_root": "",
312
+ "media_type": "image"
313
+ },
314
+ {
315
+ "anno_path": "your_path",
316
+ "data_root": "",
317
+ "media_type": "video"
318
+ },
319
+ {
320
+ "anno_path": "your_path",
321
+ "caption_augmentation": {
322
+ "caption_sample_type": "avs_all"
323
+ },
324
+ "data_root": "",
325
+ "jump_filter": true,
326
+ "media_type": "audio_video",
327
+ "read_audio_from_video": true,
328
+ "read_clip_from_video": false,
329
+ "zero_audio_padding_for_video": true
330
+ }
331
+ ],
332
+ "sbu": {
333
+ "anno_path": "your_path",
334
+ "data_root": "",
335
+ "media_type": "image"
336
+ },
337
+ "ssv2_mc_val": {
338
+ "anno_path": "your_path",
339
+ "data_root": "",
340
+ "media_type": "video"
341
+ },
342
+ "ucf101_act_val": {
343
+ "anno_path": "your_path",
344
+ "data_root": "",
345
+ "is_act_rec": true,
346
+ "media_type": "video"
347
+ },
348
+ "vatex_ch_ret_val": {
349
+ "anno_path": "your_path",
350
+ "data_root": "",
351
+ "media_type": "video"
352
+ },
353
+ "vatex_en_ret_train": {
354
+ "anno_path": "your_path",
355
+ "data_root": "",
356
+ "has_multi_txt_gt": true,
357
+ "media_type": "video"
358
+ },
359
+ "vatex_en_ret_val": {
360
+ "anno_path": "your_path",
361
+ "data_root": "",
362
+ "media_type": "video"
363
+ },
364
+ "vg": {
365
+ "anno_path": "your_path",
366
+ "data_root": "",
367
+ "jump_filter": true,
368
+ "media_type": "image"
369
+ },
370
+ "wavcaps_400k": {
371
+ "anno_path": "your_path",
372
+ "data_root": "",
373
+ "media_type": "audio"
374
+ },
375
+ "webvid": {
376
+ "anno_path": "your_path",
377
+ "data_root": "",
378
+ "media_type": "video"
379
+ },
380
+ "webvid_10m": {
381
+ "anno_path": "your_path",
382
+ "data_root": "",
383
+ "media_type": "video"
384
+ },
385
+ "webvid_debug": {
386
+ "anno_path": "your_path",
387
+ "data_root": "",
388
+ "media_type": "video"
389
+ },
390
+ "webvid_fuse_10m": {
391
+ "anno_path": "your_path",
392
+ "data_root": "",
393
+ "jump_filter": true,
394
+ "media_type": "video"
395
+ }
396
+ },
397
+ "batch_size": 8,
398
+ "batch_size_test": 4,
399
+ "compile_model": false,
400
+ "debug": false,
401
+ "deep_fusion": false,
402
+ "deepspeed": {
403
+ "enable": true,
404
+ "stage": 1
405
+ },
406
+ "device": "cuda",
407
+ "dist_url": "env://",
408
+ "evaluate": true,
409
+ "evaluation": {
410
+ "eval_frame_ensemble": "concat",
411
+ "eval_offload": true,
412
+ "eval_x_only": false,
413
+ "k_test": 128
414
+ },
415
+ "gradient_checkpointing": true,
416
+ "inputs": {
417
+ "batch_size": {
418
+ "image": 8,
419
+ "video": 8
420
+ },
421
+ "batch_size_test": {
422
+ "image": 4,
423
+ "video": 4
424
+ },
425
+ "image_res": 224,
426
+ "max_txt_l": {
427
+ "image": 40,
428
+ "video": 40
429
+ },
430
+ "video_input": {
431
+ "num_frames": 4,
432
+ "num_frames_test": 4,
433
+ "random_aug": false,
434
+ "sample_type": "rand",
435
+ "sample_type_test": "middle"
436
+ }
437
+ },
438
+ "jump_evaluate": false,
439
+ "log_freq": 100,
440
+ "max_txt_l": 40,
441
+ "mode": "pt",
442
+ "model": {
443
+ "embed_dim": 512,
444
+ "find_unused_parameters": false,
445
+ "model_cls": "InternVideo2_Stage2",
446
+ "multimodal": {
447
+ "enable": true
448
+ },
449
+ "temp": 0.07,
450
+ "text_encoder": {
451
+ "config": "configs/config_bert_large.json",
452
+ "d_model": 1024,
453
+ "fusion_layer": 19,
454
+ "name": "bert_large",
455
+ "pretrained": "bert-large-uncased"
456
+ },
457
+ "vision_encoder": {
458
+ "checkpoint_num": 40,
459
+ "clip_embed_dim": 768,
460
+ "clip_input_resolution": 224,
461
+ "clip_norm_type": "l2",
462
+ "clip_return_layer": 6,
463
+ "clip_student_return_interval": 1,
464
+ "clip_teacher": null,
465
+ "clip_teacher_embed_dim": 3200,
466
+ "clip_teacher_final_dim": 768,
467
+ "clip_teacher_return_interval": 1,
468
+ "d_model": 1408,
469
+ "image_mask_ratio": 0.5,
470
+ "image_mask_type": "random",
471
+ "img_size": 224,
472
+ "keep_temporal": false,
473
+ "name": "pretrain_internvideo2_1b_patch14_224",
474
+ "num_frames": 4,
475
+ "only_mask": true,
476
+ "patch_size": 14,
477
+ "pretrained": "",
478
+ "sep_image_video_pos_embed": true,
479
+ "tubelet_size": 1,
480
+ "use_checkpoint": true,
481
+ "use_flash_attn": false,
482
+ "use_fused_mlp": false,
483
+ "use_fused_rmsnorm": false,
484
+ "video_mask_ratio": 0.8,
485
+ "video_mask_type": "random"
486
+ }
487
+ },
488
+ "num_frames": 4,
489
+ "num_frames_test": 4,
490
+ "num_workers": 6,
491
+ "origin_num_frames": 4,
492
+ "output_dir": null,
493
+ "resume": false,
494
+ "save_latest": false,
495
+ "seed": 42,
496
+ "size_t": 224,
497
+ "text_enc": "bert_large",
498
+ "torch_dtype": "float32",
499
+ "transformers_version": "4.37.2",
500
+ "use_bf16": false,
501
+ "use_flash_sdp": false,
502
+ "use_half_precision": false,
503
+ "use_mem_efficient_sdp": false
504
+ }