model: | |
pretrain: ckpt/ek100mir.pt | |
freeze_vis_backbone: true | |
freeze_txt_backbone: true | |
inflat_posemb: true # false for cascade models; true for single-stage models (default: true) | |
num_frames: 16 | |
text_prompt: | |
n_ctx: 8 | |
use_bank: true | |
visual_prompt: | |
num_layers: 12 | |
prompt_dim: 512 | |
num_tokens: 128 | |
deep: true | |
deep_shared: false | |
split_st: false | |
pt_spt: true | |
pt_tmp: false | |
style: VoP_c_pool | |
n_seg: 16 # number of segments per video (n_seg=clip_length -> 1 frame/seg) | |
K_s: 8 # boundary of intra-frame/inter-frame attention (VoP_f+c) | |
pool: | |
size: 10 | |
data: | |
dataset: ek100_mir | |
#root: /data/EK100/video_ht256px | |
#metadata: /data/EK100/epic-kitchens-100-annotations/retrieval_annotations/EPIC_100_retrieval_train.csv | |
#metadata_val: /data/EK100/epic-kitchens-100-annotations/retrieval_annotations/EPIC_100_retrieval_test.csv | |
#relevancy_path: /data/EK100/epic-kitchens-100-annotations/retrieval_annotations/relevancy/caption_relevancy_EPIC_100_retrieval_test.pkl | |
root: data/ek100_mir/video | |
metadata_val: data/ek100_mir/csv/{}.csv | |
relevancy_path: meta/ek100_mir/relevancy_sel.npy | |
narrations: meta/ek100_mir/EPIC_100_retrieval_test_sentence.csv | |
clip_length: 16 | |