Add files using upload-large-folder tool

Browse files

Files changed (4) hide show

config.json +112 -4
generation_config.json +1 -1
special_tokens_map.json +21 -3
tokenizer_config.json +5 -2

config.json CHANGED Viewed

@@ -3,20 +3,26 @@
     "Llama4ForConditionalGeneration"
   ],
   "boi_token_index": 200080,
   "eoi_token_index": 200081,
   "image_token_index": 200092,
   "model_type": "llama4",
   "text_config": {
     "_attn_implementation_autoset": true,
     "attention_bias": false,
     "attention_chunk_size": 8192,
     "attention_dropout": 0.0,
     "bos_token_id": 200000,
     "eos_token_id": [
       200001,
       200007,
       200008
     ],
     "for_llm_compressor": false,
     "head_dim": 128,
     "hidden_act": "silu",
@@ -27,7 +33,106 @@
     "intermediate_size_mlp": 16384,
     "max_position_embeddings": 10485760,
     "model_type": "llama4_text",
-    "no_rope_layers": [],
     "num_attention_heads": 40,
     "num_experts_per_tok": 1,
     "num_hidden_layers": 48,
@@ -37,8 +142,8 @@
     "pad_token_id": 200018,
     "rms_norm_eps": 1e-05,
     "rope_scaling": {
-      "factor": 8.0,
-      "high_freq_factor": 4.0,
       "low_freq_factor": 1.0,
       "original_max_position_embeddings": 8192,
       "rope_type": "llama3"
@@ -51,8 +156,10 @@
     "use_qk_norm": true,
     "vocab_size": 202048
   },
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.51.0.dev0",
   "vision_config": {
     "_attn_implementation_autoset": true,
     "attention_dropout": 0.0,
@@ -73,6 +180,7 @@
     "projector_input_dim": 4096,
     "projector_output_dim": 4096,
     "rope_theta": 10000,
     "vision_feature_layer": -1,
     "vision_feature_select_strategy": "default",
     "vision_output_dim": 4096

     "Llama4ForConditionalGeneration"
   ],
   "boi_token_index": 200080,
+  "bos_token_id": 200000,
   "eoi_token_index": 200081,
+  "eos_token_id": 200008,
   "image_token_index": 200092,
   "model_type": "llama4",
+  "pad_token_id": 200018,
   "text_config": {
     "_attn_implementation_autoset": true,
     "attention_bias": false,
     "attention_chunk_size": 8192,
     "attention_dropout": 0.0,
+    "attn_scale": 0.1,
+    "attn_temperature_tuning": 4,
     "bos_token_id": 200000,
     "eos_token_id": [
       200001,
       200007,
       200008
     ],
+    "floor_scale": 8192,
     "for_llm_compressor": false,
     "head_dim": 128,
     "hidden_act": "silu",
     "intermediate_size_mlp": 16384,
     "max_position_embeddings": 10485760,
     "model_type": "llama4_text",
+    "moe_layers": [
+      0,
+      1,
+      2,
+      3,
+      4,
+      5,
+      6,
+      7,
+      8,
+      9,
+      10,
+      11,
+      12,
+      13,
+      14,
+      15,
+      16,
+      17,
+      18,
+      19,
+      20,
+      21,
+      22,
+      23,
+      24,
+      25,
+      26,
+      27,
+      28,
+      29,
+      30,
+      31,
+      32,
+      33,
+      34,
+      35,
+      36,
+      37,
+      38,
+      39,
+      40,
+      41,
+      42,
+      43,
+      44,
+      45,
+      46,
+      47
+    ],
+    "no_rope_layers": [
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0
+    ],
     "num_attention_heads": 40,
     "num_experts_per_tok": 1,
     "num_hidden_layers": 48,
     "pad_token_id": 200018,
     "rms_norm_eps": 1e-05,
     "rope_scaling": {
+      "factor": 16.0,
+      "high_freq_factor": 1.0,
       "low_freq_factor": 1.0,
       "original_max_position_embeddings": 8192,
       "rope_type": "llama3"
     "use_qk_norm": true,
     "vocab_size": 202048
   },
+  "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "unsloth_fixed": true,
   "vision_config": {
     "_attn_implementation_autoset": true,
     "attention_dropout": 0.0,
     "projector_input_dim": 4096,
     "projector_output_dim": 4096,
     "rope_theta": 10000,
+    "torch_dtype": "bfloat16",
     "vision_feature_layer": -1,
     "vision_feature_select_strategy": "default",
     "vision_output_dim": 4096

generation_config.json CHANGED Viewed

@@ -9,5 +9,5 @@
   "pad_token_id": 200018,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.51.0.dev0"
 }

   "pad_token_id": 200018,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.51.0"
 }

special_tokens_map.json CHANGED Viewed

@@ -1,5 +1,23 @@
 {
-  "bos_token": "<|begin_of_text|>",
-  "eos_token": "<|eot|>",
-  "pad_token": "<|finetune_right_pad_id|>"
 }

 {
+  "bos_token": {
+    "content": "<|begin_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|eot|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|finetune_right_pad|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer_config.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "added_tokens_decoder": {
     "200000": {
       "content": "<|begin_of_text|>",
@@ -9091,7 +9092,9 @@
     "attention_mask"
   ],
   "model_max_length": 10485760,
-  "pad_token": "<|finetune_right_pad_id|>",
   "processor_class": "Llama4Processor",
-  "tokenizer_class": "PreTrainedTokenizer"
 }

 {
+  "add_bos_token": true,
   "added_tokens_decoder": {
     "200000": {
       "content": "<|begin_of_text|>",
     "attention_mask"
   ],
   "model_max_length": 10485760,
+  "pad_token": "<|finetune_right_pad|>",
+  "padding_side": "left",
   "processor_class": "Llama4Processor",
+  "tokenizer_class": "PreTrainedTokenizer",
+  "unk_token": null
 }