danielhanchen commited on
Commit
d4836ab
·
verified ·
1 Parent(s): 8215906

Add files using upload-large-folder tool

Browse files
config.json CHANGED
@@ -3,20 +3,26 @@
3
  "Llama4ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 200080,
 
6
  "eoi_token_index": 200081,
 
7
  "image_token_index": 200092,
8
  "model_type": "llama4",
 
9
  "text_config": {
10
  "_attn_implementation_autoset": true,
11
  "attention_bias": false,
12
  "attention_chunk_size": 8192,
13
  "attention_dropout": 0.0,
 
 
14
  "bos_token_id": 200000,
15
  "eos_token_id": [
16
  200001,
17
  200007,
18
  200008
19
  ],
 
20
  "for_llm_compressor": false,
21
  "head_dim": 128,
22
  "hidden_act": "silu",
@@ -27,7 +33,106 @@
27
  "intermediate_size_mlp": 16384,
28
  "max_position_embeddings": 10485760,
29
  "model_type": "llama4_text",
30
- "no_rope_layers": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "num_attention_heads": 40,
32
  "num_experts_per_tok": 1,
33
  "num_hidden_layers": 48,
@@ -37,8 +142,8 @@
37
  "pad_token_id": 200018,
38
  "rms_norm_eps": 1e-05,
39
  "rope_scaling": {
40
- "factor": 8.0,
41
- "high_freq_factor": 4.0,
42
  "low_freq_factor": 1.0,
43
  "original_max_position_embeddings": 8192,
44
  "rope_type": "llama3"
@@ -51,8 +156,10 @@
51
  "use_qk_norm": true,
52
  "vocab_size": 202048
53
  },
 
54
  "torch_dtype": "bfloat16",
55
- "transformers_version": "4.51.0.dev0",
 
56
  "vision_config": {
57
  "_attn_implementation_autoset": true,
58
  "attention_dropout": 0.0,
@@ -73,6 +180,7 @@
73
  "projector_input_dim": 4096,
74
  "projector_output_dim": 4096,
75
  "rope_theta": 10000,
 
76
  "vision_feature_layer": -1,
77
  "vision_feature_select_strategy": "default",
78
  "vision_output_dim": 4096
 
3
  "Llama4ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 200080,
6
+ "bos_token_id": 200000,
7
  "eoi_token_index": 200081,
8
+ "eos_token_id": 200008,
9
  "image_token_index": 200092,
10
  "model_type": "llama4",
11
+ "pad_token_id": 200018,
12
  "text_config": {
13
  "_attn_implementation_autoset": true,
14
  "attention_bias": false,
15
  "attention_chunk_size": 8192,
16
  "attention_dropout": 0.0,
17
+ "attn_scale": 0.1,
18
+ "attn_temperature_tuning": 4,
19
  "bos_token_id": 200000,
20
  "eos_token_id": [
21
  200001,
22
  200007,
23
  200008
24
  ],
25
+ "floor_scale": 8192,
26
  "for_llm_compressor": false,
27
  "head_dim": 128,
28
  "hidden_act": "silu",
 
33
  "intermediate_size_mlp": 16384,
34
  "max_position_embeddings": 10485760,
35
  "model_type": "llama4_text",
36
+ "moe_layers": [
37
+ 0,
38
+ 1,
39
+ 2,
40
+ 3,
41
+ 4,
42
+ 5,
43
+ 6,
44
+ 7,
45
+ 8,
46
+ 9,
47
+ 10,
48
+ 11,
49
+ 12,
50
+ 13,
51
+ 14,
52
+ 15,
53
+ 16,
54
+ 17,
55
+ 18,
56
+ 19,
57
+ 20,
58
+ 21,
59
+ 22,
60
+ 23,
61
+ 24,
62
+ 25,
63
+ 26,
64
+ 27,
65
+ 28,
66
+ 29,
67
+ 30,
68
+ 31,
69
+ 32,
70
+ 33,
71
+ 34,
72
+ 35,
73
+ 36,
74
+ 37,
75
+ 38,
76
+ 39,
77
+ 40,
78
+ 41,
79
+ 42,
80
+ 43,
81
+ 44,
82
+ 45,
83
+ 46,
84
+ 47
85
+ ],
86
+ "no_rope_layers": [
87
+ 1,
88
+ 1,
89
+ 1,
90
+ 0,
91
+ 1,
92
+ 1,
93
+ 1,
94
+ 0,
95
+ 1,
96
+ 1,
97
+ 1,
98
+ 0,
99
+ 1,
100
+ 1,
101
+ 1,
102
+ 0,
103
+ 1,
104
+ 1,
105
+ 1,
106
+ 0,
107
+ 1,
108
+ 1,
109
+ 1,
110
+ 0,
111
+ 1,
112
+ 1,
113
+ 1,
114
+ 0,
115
+ 1,
116
+ 1,
117
+ 1,
118
+ 0,
119
+ 1,
120
+ 1,
121
+ 1,
122
+ 0,
123
+ 1,
124
+ 1,
125
+ 1,
126
+ 0,
127
+ 1,
128
+ 1,
129
+ 1,
130
+ 0,
131
+ 1,
132
+ 1,
133
+ 1,
134
+ 0
135
+ ],
136
  "num_attention_heads": 40,
137
  "num_experts_per_tok": 1,
138
  "num_hidden_layers": 48,
 
142
  "pad_token_id": 200018,
143
  "rms_norm_eps": 1e-05,
144
  "rope_scaling": {
145
+ "factor": 16.0,
146
+ "high_freq_factor": 1.0,
147
  "low_freq_factor": 1.0,
148
  "original_max_position_embeddings": 8192,
149
  "rope_type": "llama3"
 
156
  "use_qk_norm": true,
157
  "vocab_size": 202048
158
  },
159
+ "tie_word_embeddings": false,
160
  "torch_dtype": "bfloat16",
161
+ "transformers_version": "4.51.0",
162
+ "unsloth_fixed": true,
163
  "vision_config": {
164
  "_attn_implementation_autoset": true,
165
  "attention_dropout": 0.0,
 
180
  "projector_input_dim": 4096,
181
  "projector_output_dim": 4096,
182
  "rope_theta": 10000,
183
+ "torch_dtype": "bfloat16",
184
  "vision_feature_layer": -1,
185
  "vision_feature_select_strategy": "default",
186
  "vision_output_dim": 4096
generation_config.json CHANGED
@@ -9,5 +9,5 @@
9
  "pad_token_id": 200018,
10
  "temperature": 0.6,
11
  "top_p": 0.9,
12
- "transformers_version": "4.51.0.dev0"
13
  }
 
9
  "pad_token_id": 200018,
10
  "temperature": 0.6,
11
  "top_p": 0.9,
12
+ "transformers_version": "4.51.0"
13
  }
special_tokens_map.json CHANGED
@@ -1,5 +1,23 @@
1
  {
2
- "bos_token": "<|begin_of_text|>",
3
- "eos_token": "<|eot|>",
4
- "pad_token": "<|finetune_right_pad_id|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "200000": {
4
  "content": "<|begin_of_text|>",
@@ -9091,7 +9092,9 @@
9091
  "attention_mask"
9092
  ],
9093
  "model_max_length": 10485760,
9094
- "pad_token": "<|finetune_right_pad_id|>",
 
9095
  "processor_class": "Llama4Processor",
9096
- "tokenizer_class": "PreTrainedTokenizer"
 
9097
  }
 
1
  {
2
+ "add_bos_token": true,
3
  "added_tokens_decoder": {
4
  "200000": {
5
  "content": "<|begin_of_text|>",
 
9092
  "attention_mask"
9093
  ],
9094
  "model_max_length": 10485760,
9095
+ "pad_token": "<|finetune_right_pad|>",
9096
+ "padding_side": "left",
9097
  "processor_class": "Llama4Processor",
9098
+ "tokenizer_class": "PreTrainedTokenizer",
9099
+ "unk_token": null
9100
  }