orange-sk commited on
Commit
378c48e
·
verified ·
1 Parent(s): 162bbe3

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
config.json ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/ossfs/workspace/nas2/ViLAMP/llava-qwen-lv-exp13-s3-seq_avg_pooling_bilinear_2x2_siglip_085_n1_step15000",
3
+ "activate_frame_selector": true,
4
+ "activate_pre_fusion": false,
5
+ "add_faster_video": false,
6
+ "add_time_instruction": false,
7
+ "architectures": [
8
+ "LlavaQwenForCausalLM"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 151643,
12
+ "clip_model_path": "clip-ViT-B-32/0_CLIPModel",
13
+ "cluster_num": 64,
14
+ "composer_proj_dim": 512,
15
+ "composer_proj_mlp_depth": 2,
16
+ "composer_type": "average_pooling",
17
+ "eos_token_id": 151645,
18
+ "f2f_thrd": 0.85,
19
+ "f2t_thrd": -1,
20
+ "force_sample": true,
21
+ "frame_selector_type": "SeqFrameSelector",
22
+ "hidden_act": "silu",
23
+ "hidden_size": 3584,
24
+ "image_aspect_ratio": "anyres_max_9",
25
+ "image_crop_resolution": null,
26
+ "image_grid_pinpoints": [
27
+ [
28
+ 384,
29
+ 384
30
+ ],
31
+ [
32
+ 384,
33
+ 768
34
+ ],
35
+ [
36
+ 384,
37
+ 1152
38
+ ],
39
+ [
40
+ 384,
41
+ 1536
42
+ ],
43
+ [
44
+ 384,
45
+ 1920
46
+ ],
47
+ [
48
+ 384,
49
+ 2304
50
+ ],
51
+ [
52
+ 768,
53
+ 384
54
+ ],
55
+ [
56
+ 768,
57
+ 768
58
+ ],
59
+ [
60
+ 768,
61
+ 1152
62
+ ],
63
+ [
64
+ 768,
65
+ 1536
66
+ ],
67
+ [
68
+ 768,
69
+ 1920
70
+ ],
71
+ [
72
+ 768,
73
+ 2304
74
+ ],
75
+ [
76
+ 1152,
77
+ 384
78
+ ],
79
+ [
80
+ 1152,
81
+ 768
82
+ ],
83
+ [
84
+ 1152,
85
+ 1152
86
+ ],
87
+ [
88
+ 1152,
89
+ 1536
90
+ ],
91
+ [
92
+ 1152,
93
+ 1920
94
+ ],
95
+ [
96
+ 1152,
97
+ 2304
98
+ ],
99
+ [
100
+ 1536,
101
+ 384
102
+ ],
103
+ [
104
+ 1536,
105
+ 768
106
+ ],
107
+ [
108
+ 1536,
109
+ 1152
110
+ ],
111
+ [
112
+ 1536,
113
+ 1536
114
+ ],
115
+ [
116
+ 1536,
117
+ 1920
118
+ ],
119
+ [
120
+ 1536,
121
+ 2304
122
+ ],
123
+ [
124
+ 1920,
125
+ 384
126
+ ],
127
+ [
128
+ 1920,
129
+ 768
130
+ ],
131
+ [
132
+ 1920,
133
+ 1152
134
+ ],
135
+ [
136
+ 1920,
137
+ 1536
138
+ ],
139
+ [
140
+ 1920,
141
+ 1920
142
+ ],
143
+ [
144
+ 1920,
145
+ 2304
146
+ ],
147
+ [
148
+ 2304,
149
+ 384
150
+ ],
151
+ [
152
+ 2304,
153
+ 768
154
+ ],
155
+ [
156
+ 2304,
157
+ 1152
158
+ ],
159
+ [
160
+ 2304,
161
+ 1536
162
+ ],
163
+ [
164
+ 2304,
165
+ 1920
166
+ ],
167
+ [
168
+ 2304,
169
+ 2304
170
+ ]
171
+ ],
172
+ "image_split_resolution": null,
173
+ "image_token_index": 151646,
174
+ "initializer_range": 0.02,
175
+ "intermediate_size": 18944,
176
+ "max_frame_num": 32,
177
+ "max_position_embeddings": 32768,
178
+ "max_window_layers": 28,
179
+ "mm_hidden_size": 1152,
180
+ "mm_newline_position": "one_token",
181
+ "mm_patch_merge_type": "spatial_unpad",
182
+ "mm_projector_lr": null,
183
+ "mm_projector_type": "mlp2x_gelu",
184
+ "mm_resampler_type": null,
185
+ "mm_spatial_pool_mode": "bilinear",
186
+ "mm_tunable_parts": "mm_language_model,mm_mlp_adapter",
187
+ "mm_use_im_patch_token": false,
188
+ "mm_use_im_start_end": false,
189
+ "mm_vision_select_feature": "patch",
190
+ "mm_vision_select_layer": -2,
191
+ "mm_vision_tower": "siglip-so400m-patch14-384",
192
+ "mm_vision_tower_lr": 2e-06,
193
+ "model_type": "llava_qwen",
194
+ "num_attention_heads": 28,
195
+ "num_hidden_layers": 28,
196
+ "num_key_value_heads": 4,
197
+ "pos_skipping_range": 4096,
198
+ "rms_norm_eps": 1e-06,
199
+ "rope_scaling": null,
200
+ "rope_theta": 1000000.0,
201
+ "sliding_window": null,
202
+ "tie_word_embeddings": false,
203
+ "tokenizer_model_max_length": 32768,
204
+ "tokenizer_padding_side": "right",
205
+ "torch_dtype": "bfloat16",
206
+ "transformers_version": "4.45.0",
207
+ "use_cache": true,
208
+ "use_mm_proj": true,
209
+ "use_pos_skipping": false,
210
+ "use_sliding_window": false,
211
+ "vision_tower_pretrained": null,
212
+ "vocab_size": 151647
213
+ }
generation_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attn_implementation": "flash_attention_2",
3
+ "bos_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "pad_token_id": 151643,
10
+ "repetition_penalty": 1.05,
11
+ "temperature": 0.7,
12
+ "top_k": 20,
13
+ "top_p": 0.8,
14
+ "transformers_version": "4.45.0"
15
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcfe42da0a4497e8b2b172c1f9f4ec423a46dc12907f4349c55025f670422ba9
3
+ size 11418266
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|im_end|>",
37
+ "errors": "replace",
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "padding_side": "right",
41
+ "split_special_tokens": false,
42
+ "tokenizer_class": "Qwen2Tokenizer",
43
+ "unk_token": null
44
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff