Howieeeee commited on
Commit
477153a
·
verified ·
1 Parent(s): 2431a67

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llava_onevision_arch.png filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "<image>": 151646,
3
+ "<video>": 151647,
4
+ "<|endoftext|>": 151643,
5
+ "<|im_end|>": 151645,
6
+ "<|im_start|>": 151644
7
+ }
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + ' '}}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>' }}{% endfor %}{# Render all video then #}{% for content in message['content'] | selectattr('type', 'equalto', 'video') %}{{ '<video>' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ '\n' + content['text'] }}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ '\n' + content['text'] }}{% endgeneration %}{% endfor %}{% endif %}{{'<|im_end|>'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
3
+ }
config.json ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/raid/raushan/ov-500",
3
+ "architectures": [
4
+ "LlavaOnevisionForConditionalGeneration"
5
+ ],
6
+ "ignore_index": -100,
7
+ "image_grid_pinpoints": [
8
+ [
9
+ 384,
10
+ 384
11
+ ],
12
+ [
13
+ 384,
14
+ 768
15
+ ],
16
+ [
17
+ 384,
18
+ 1152
19
+ ],
20
+ [
21
+ 384,
22
+ 1536
23
+ ],
24
+ [
25
+ 384,
26
+ 1920
27
+ ],
28
+ [
29
+ 384,
30
+ 2304
31
+ ],
32
+ [
33
+ 768,
34
+ 384
35
+ ],
36
+ [
37
+ 768,
38
+ 768
39
+ ],
40
+ [
41
+ 768,
42
+ 1152
43
+ ],
44
+ [
45
+ 768,
46
+ 1536
47
+ ],
48
+ [
49
+ 768,
50
+ 1920
51
+ ],
52
+ [
53
+ 768,
54
+ 2304
55
+ ],
56
+ [
57
+ 1152,
58
+ 384
59
+ ],
60
+ [
61
+ 1152,
62
+ 768
63
+ ],
64
+ [
65
+ 1152,
66
+ 1152
67
+ ],
68
+ [
69
+ 1152,
70
+ 1536
71
+ ],
72
+ [
73
+ 1152,
74
+ 1920
75
+ ],
76
+ [
77
+ 1152,
78
+ 2304
79
+ ],
80
+ [
81
+ 1536,
82
+ 384
83
+ ],
84
+ [
85
+ 1536,
86
+ 768
87
+ ],
88
+ [
89
+ 1536,
90
+ 1152
91
+ ],
92
+ [
93
+ 1536,
94
+ 1536
95
+ ],
96
+ [
97
+ 1536,
98
+ 1920
99
+ ],
100
+ [
101
+ 1536,
102
+ 2304
103
+ ],
104
+ [
105
+ 1920,
106
+ 384
107
+ ],
108
+ [
109
+ 1920,
110
+ 768
111
+ ],
112
+ [
113
+ 1920,
114
+ 1152
115
+ ],
116
+ [
117
+ 1920,
118
+ 1536
119
+ ],
120
+ [
121
+ 1920,
122
+ 1920
123
+ ],
124
+ [
125
+ 1920,
126
+ 2304
127
+ ],
128
+ [
129
+ 2304,
130
+ 384
131
+ ],
132
+ [
133
+ 2304,
134
+ 768
135
+ ],
136
+ [
137
+ 2304,
138
+ 1152
139
+ ],
140
+ [
141
+ 2304,
142
+ 1536
143
+ ],
144
+ [
145
+ 2304,
146
+ 1920
147
+ ],
148
+ [
149
+ 2304,
150
+ 2304
151
+ ]
152
+ ],
153
+ "image_token_index": 151646,
154
+ "model_type": "llava_onevision",
155
+ "projector_hidden_act": "gelu",
156
+ "text_config": {
157
+ "_name_or_path": "Qwen/Qwen2-0.5B-Instruct",
158
+ "architectures": [
159
+ "Qwen2ForCausalLM"
160
+ ],
161
+ "bos_token_id": 151643,
162
+ "eos_token_id": 151645,
163
+ "hidden_size": 896,
164
+ "intermediate_size": 4864,
165
+ "max_window_layers": 24,
166
+ "model_type": "qwen2",
167
+ "num_attention_heads": 14,
168
+ "num_hidden_layers": 24,
169
+ "num_key_value_heads": 2,
170
+ "rope_theta": 1000000.0,
171
+ "tie_word_embeddings": true,
172
+ "torch_dtype": "bfloat16",
173
+ "vocab_size": 152000
174
+ },
175
+ "tie_word_embeddings": false,
176
+ "torch_dtype": "float16",
177
+ "transformers_version": "4.45.0.dev0",
178
+ "use_image_newline_parameter": true,
179
+ "video_token_index": 151647,
180
+ "vision_aspect_ratio": "anyres_max_9",
181
+ "vision_config": {
182
+ "hidden_size": 1152,
183
+ "image_size": 384,
184
+ "intermediate_size": 4304,
185
+ "model_type": "siglip_vision_model",
186
+ "num_attention_heads": 16,
187
+ "num_hidden_layers": 26,
188
+ "patch_size": 14,
189
+ "vision_use_head": false
190
+ },
191
+ "vision_feature_layer": -1,
192
+ "vision_feature_select_strategy": "full"
193
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
+ "transformers_version": "4.45.0.dev0"
6
+ }
llava_onevision_arch.png ADDED

Git LFS Details

  • SHA256: 12a48d195ed4c07ffabac4b27aacb9cd79f8bd089e3ebe06cd1fee28e71fa98e
  • Pointer size: 131 Bytes
  • Size of remote file: 209 kB
merges.txt ADDED
The diff for this file is too large to render. See raw diff