drishanarora commited on 17 days ago

Commit

132217a

verified ·

1 Parent(s): c43979c

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
README.md +253 -0
chat_template.jinja +150 -0
config.json +231 -0
model-00001-of-00050.safetensors +3 -0
model-00002-of-00050.safetensors +3 -0
model-00004-of-00050.safetensors +3 -0
model-00005-of-00050.safetensors +3 -0
model-00007-of-00050.safetensors +3 -0
model-00008-of-00050.safetensors +3 -0
model-00009-of-00050.safetensors +3 -0
model-00011-of-00050.safetensors +3 -0
model-00012-of-00050.safetensors +3 -0
model-00014-of-00050.safetensors +3 -0
model-00015-of-00050.safetensors +3 -0
model-00016-of-00050.safetensors +3 -0
model-00017-of-00050.safetensors +3 -0
model-00018-of-00050.safetensors +3 -0
model-00019-of-00050.safetensors +3 -0
model-00020-of-00050.safetensors +3 -0
model-00022-of-00050.safetensors +3 -0
model-00023-of-00050.safetensors +3 -0
model-00024-of-00050.safetensors +3 -0
model-00025-of-00050.safetensors +3 -0
model-00026-of-00050.safetensors +3 -0
model-00027-of-00050.safetensors +3 -0
model-00030-of-00050.safetensors +3 -0
model-00032-of-00050.safetensors +3 -0
model-00033-of-00050.safetensors +3 -0
model-00034-of-00050.safetensors +3 -0
model-00035-of-00050.safetensors +3 -0
model-00036-of-00050.safetensors +3 -0
model-00037-of-00050.safetensors +3 -0
model-00038-of-00050.safetensors +3 -0
model-00039-of-00050.safetensors +3 -0
model-00040-of-00050.safetensors +3 -0
model-00041-of-00050.safetensors +3 -0
model-00042-of-00050.safetensors +3 -0
model-00043-of-00050.safetensors +3 -0
model-00044-of-00050.safetensors +3 -0
model-00045-of-00050.safetensors +3 -0
model-00046-of-00050.safetensors +3 -0
model-00047-of-00050.safetensors +3 -0
model-00049-of-00050.safetensors +3 -0
model-00050-of-00050.safetensors +3 -0
model.safetensors.index.json +0 -0
preprocessor_config.json +33 -0
processor_config.json +6 -0
special_tokens_map.json +23 -0
tokenizer.json +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,253 @@

+---
+license: llama4
+library_name: transformers
+base_model:
+- meta-llama/Llama-4-Scout-17B-16E
+---
+<p align="center">
+  <img src="images/deep-cogito-logo.png" alt="Logo" width="40%">
+</p>
+# Cogito v2 preview - 109B MoE
+[Blog Post](https://www.deepcogito.com/research/cogito-v2-preview)
+The Cogito v2 LLMs are instruction tuned generative models. All models are released under an open license for commercial use.
+- Cogito v2 models are hybrid reasoning models. Each model can answer directly (standard LLM), or self-reflect before answering (like reasoning models).
+- The LLMs are trained using **Iterated Distillation and Amplification (IDA)** - an scalable and efficient alignment strategy for superintelligence using iterative self-improvement.
+- The models have been optimized for coding, STEM, instruction following and general helpfulness, and have significantly higher multilingual, coding and tool calling capabilities than size equivalent counterparts.
+  - In both standard and reasoning modes, Cogito v2-preview models outperform their size equivalent counterparts on common industry benchmarks.
+- This model is trained in over 30 languages and supports long contexts (upto 10M tokens).
+# Evaluations
+For detailed evaluations, please refer to the [Blog Post](https://www.deepcogito.com/research/cogito-v2-preview).
+# Usage
+Here is a snippet below for usage with Transformers:
+```python
+import transformers
+import torch
+model_id = "deepcogito/cogito-v2-preview-llama-109B-MoE"
+pipeline = transformers.pipeline(
+    "text-generation",
+    model=model_id,
+    model_kwargs={"torch_dtype": torch.bfloat16},
+    device_map="auto",
+)
+messages = [
+    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
+    {"role": "user", "content": "Give me a short introduction to LLMs."},
+]
+outputs = pipeline(
+    messages,
+    max_new_tokens=512,
+)
+print(outputs[0]["generated_text"][-1])
+```
+## Implementing extended thinking
+- By default, the model will answer in the standard mode.
+- To enable thinking, you can do any one of the two methods:
+  - Set `enable_thinking=True` while applying the chat template.
+  - Add a specific system prompt, along with prefilling the response with "\<think\>\n".
+**NOTE: Unlike Cogito v1 models, we initiate the response with "\<think\>\n" at the beginning of every output when reasoning is enabled. This is because hybrid models can be brittle at times (<0.1% of the cases), and adding a "\<think\>\n" ensures that the model does indeed respect thinking.**
+### Method 1 - Set enable_thinking=True in the tokenizer
+If you are using Huggingface tokenizers, then you can simply use add the argument `enable_thinking=True` to the tokenization (this option is added to the chat template).
+Here is an example -
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "deepcogito/cogito-v2-preview-llama-109B-MoE"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+prompt = "Give me a short introduction to LLMs."
+messages = [
+    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
+    {"role": "user", "content": prompt}
+]
+text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
+    add_generation_prompt=True,
+    enable_thinking=True
+)
+model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+generated_ids = model.generate(
+    **model_inputs,
+    max_new_tokens=512
+)
+generated_ids = [
+    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(response)
+```
+### Method 2 - Add a specific system prompt, along with prefilling the response with "\<think\>\n".
+To enable thinking using this method, you need to do two parts -
+Step 1 - Simply use this in the system prompt `system_instruction = 'Enable deep thinking subroutine.'`
+If you already have a system_instruction, then use `system_instruction = 'Enable deep thinking subroutine.' + '\n\n' + system_instruction`.
+Step 2 - Prefil the response with the tokens `"<think>\n"`.
+Here is an example -
+```python
+import transformers
+import torch
+model_name = "deepcogito/cogito-v2-preview-llama-109B-MoE"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Step 1 - Add deep thinking instruction.
+DEEP_THINKING_INSTRUCTION = "Enable deep thinking subroutine."
+messages = [
+    {"role": "system", "content": DEEP_THINKING_INSTRUCTION},
+    {"role": "user", "content": "Write a bash script that takes a matrix represented as a string with format '[1,2],[3,4],[5,6]' and prints the transpose in the same format."},
+]
+text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
+    add_generation_prompt=True
+)
+# Step 2 - Prefill response with "<think>\n".
+text += "<think>\n"
+# Now, continue as usual.
+model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+generated_ids = model.generate(
+    **model_inputs,
+    max_new_tokens=512
+)
+generated_ids = [
+    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(response)
+```
+Similarly, if you have a system prompt, you can append the `DEEP_THINKING_INSTRUCTION` to the beginning in this way -
+```python
+DEEP_THINKING_INSTRUCTION = "Enable deep thinking subroutine."
+system_prompt = "Reply to each prompt with only the actual code - no explanations."
+prompt = "Write a bash script that takes a matrix represented as a string with format '[1,2],[3,4],[5,6]' and prints the transpose in the same format."
+messages = [
+    {"role": "system", "content": DEEP_THINKING_INSTRUCTION + '\n\n' + system_prompt},
+    {"role": "user", "content": prompt}
+]
+```
+# Tool Calling
+Cogito models support tool calling (single, parallel, multiple and parallel_multiple) both in standard and extended thinking mode.
+Here is a snippet -
+```python
+# First, define a tool
+def get_current_temperature(location: str) -> float:
+    """
+    Get the current temperature at a location.
+    Args:
+        location: The location to get the temperature for, in the format "City, Country"
+    Returns:
+        The current temperature at the specified location in the specified units, as a float.
+    """
+    return 22.  # A real function should probably actually get the temperature!
+# Next, create a chat and apply the chat template
+messages = [
+  {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
+]
+model_inputs = tokenizer.apply_chat_template(messages, tools=[get_current_temperature], add_generation_prompt=True)
+text = tokenizer.apply_chat_template(messages, tools=[get_current_temperature], add_generation_prompt=True, tokenize=False)
+inputs = tokenizer(text, return_tensors="pt", add_special_tokens=False).to(model.device)
+outputs = model.generate(**inputs, max_new_tokens=512)
+output_text = tokenizer.batch_decode(outputs)[0][len(text):]
+print(output_text)
+```
+This will result in the output -
+```
+<tool_call>
+{"name": "get_current_temperature", "arguments": {"location": "Paris, France"}}
+</tool_call><|eot|>
+```
+You can then generate text from this input as normal. If the model generates a tool call, you should add it to the chat like so:
+```python
+tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France"}}
+messages.append({"role": "assistant", "tool_calls": [{"type": "function", "function": tool_call}]})
+```
+and then call the tool and append the result, with the `tool` role, like so:
+```python
+messages.append({"role": "tool", "name": "get_current_temperature", "content": "22.0"})
+```
+After that, you can `generate()` again to let the model use the tool result in the chat:
+```python
+text = tokenizer.apply_chat_template(messages, tools=[get_current_temperature], add_generation_prompt=True, tokenize=False)
+inputs = tokenizer(text, return_tensors="pt", add_special_tokens=False).to(model.device)
+outputs = model.generate(**inputs, max_new_tokens=512)
+output_text = tokenizer.batch_decode(outputs)[0][len(text):]
+```
+This should result in the string -
+```
+'The current temperature in Paris is 22.0 degrees.<|eot|>'
+```
+## License
+This repository and the model weights are licensed under the [Llama 4 Community License Agreement](https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE) (Llama models' default license agreement).
+## Contact
+If you would like to reach out to our team, send an email to [[email protected]]([email protected]).

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,150 @@

+{{- bos_token }}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{%- if not enable_thinking is defined %}
+    {%- set enable_thinking = false %}
+{%- endif %}
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- if messages[0]['content'] is string %}
+        {%- set system_message = messages[0]['content']|trim %}
+    {%- else %}
+        {%- set system_message = messages[0]['content'][0]['text']|trim %}
+    {%- endif %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+{#- Set the system message. If enable_thinking is true, add the "Enable deep thinking subroutine." #}
+{%- if enable_thinking %}
+    {%- if system_message != "" %}
+        {%- set system_message = "Enable deep thinking subroutine.
+" ~ system_message %}
+    {%- else %}
+        {%- set system_message = "Enable deep thinking subroutine." %}
+    {%- endif %}
+{%- endif %}
+{#- System message + tools #}
+{%- if tools is not none or system_message != '' %}
+    {{- "<|header_start|>system<|header_end|>
+" }}
+    {{- system_message }}
+    {%- if tools is not none %}
+        {%- if system_message != "" %}
+            {{- "
+" }}
+        {%- endif %}
+        {{- "Available Tools:
+" }}
+        {%- for t in tools %}
+            {{- t | tojson(indent=4) }}
+            {{- "
+" }}
+        {%- endfor %}
+    {%- endif %}
+    {{- "<|eot|>" }}
+{%- endif %}
+{#- Rest of the messages #}
+{%- for message in messages %}
+    {#- Case 1 - Usual, non tool related message. #}
+    {%- if not (message.role == "ipython" or message.role == "tool" or message.role == "tool_results" or (message.tool_calls is defined and message.tool_calls is not none)) %}
+        {{- '<|header_start|>' + message['role'] + '<|header_end|>
+' }}
+        {%- if message['content'] is string %}
+            {{- message['content'] }}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {{- "<|eot|>" }}
+    {#- Case 2 - the response is from the assistant, but has a tool call returned. #}
+    {%- elif message.tool_calls is defined and message.tool_calls is not none %}
+        {{- "<|header_start|>assistant<|header_end|>
+" }}
+        {%- if message['content'] is string %}
+            {{- message['content'] }}
+            {%- if message['content'] | trim != "" %}
+                {{- "
+" }}
+            {%- endif %}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] }}
+                    {%- if content['text'] | trim != "" %}
+                        {{- "
+" }}
+                    {%- endif %}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {{- "[" }}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set out = tool_call.function|tojson %}
+                {%- if not tool_call.id is defined %}
+                    {{- out }}
+                {%- else %}
+                    {{- out[:-1] }}
+                    {{- ', "id": "' + tool_call.id + '"}' }}
+                {%- endif %}
+            {%- else %}
+                {{- tool_call|tojson }}
+            {%- endif %}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- else %}
+                {{- "]<|eot|>" }}
+            {%- endif %}
+        {%- endfor %}
+    {#- Case 3 - the response is from a tool call. #}
+    {%- elif message.role == "ipython" or message["role"] == "tool_results" or message["role"] == "tool" %}
+        {{- "<|header_start|>ipython<|header_end|>
+" }}
+        {%- if message.tool_call_id is defined and message.tool_call_id != '' %}
+            {{- '{"content": ' }}
+            {%- if message.content is mapping or (message.content is iterable and not message.content is string) %}
+                {{- message.content | tojson }}
+            {%- else %}
+                {{- '"' ~ message.content ~ '"' }}
+            {%- endif %}
+            {{- ', "call_id": "' ~ message.tool_call_id ~ '"}' }}
+        {%- else %}
+            {%- if message.content is mapping or (message.content is iterable and not message.content is string) %}
+                {{- message.content | tojson }}
+            {%- else %}
+                {{- message.content }}
+            {%- endif %}
+        {%- endif %}
+        {{- "<|eot|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|header_start|>assistant<|header_end|>\n\n' }}
+    {%- if enable_thinking %}
+        {{- '<think>\n' }}
+    {%- endif %}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,231 @@

+{
+  "architectures": [
+    "Llama4ForConditionalGeneration"
+  ],
+  "boi_token_index": 200080,
+  "eoi_token_index": 200081,
+  "image_token_index": 200092,
+  "model_type": "llama4",
+  "text_config": {
+    "attention_bias": false,
+    "attention_chunk_size": 8192,
+    "attention_dropout": 0.0,
+    "attn_scale": 0.1,
+    "attn_temperature_tuning": true,
+    "bos_token_id": 200000,
+    "eos_token_id": [
+      200001,
+      200007,
+      200008
+    ],
+    "floor_scale": 8192,
+    "for_llm_compressor": false,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 5120,
+    "initializer_range": 0.02,
+    "interleave_moe_layer_step": 1,
+    "intermediate_size": 8192,
+    "intermediate_size_mlp": 16384,
+    "layer_types": [
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 262144,
+    "model_type": "llama4_text",
+    "moe_layers": [
+      0,
+      1,
+      2,
+      3,
+      4,
+      5,
+      6,
+      7,
+      8,
+      9,
+      10,
+      11,
+      12,
+      13,
+      14,
+      15,
+      16,
+      17,
+      18,
+      19,
+      20,
+      21,
+      22,
+      23,
+      24,
+      25,
+      26,
+      27,
+      28,
+      29,
+      30,
+      31,
+      32,
+      33,
+      34,
+      35,
+      36,
+      37,
+      38,
+      39,
+      40,
+      41,
+      42,
+      43,
+      44,
+      45,
+      46,
+      47
+    ],
+    "no_rope_layers": [
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0
+    ],
+    "num_attention_heads": 40,
+    "num_experts_per_tok": 1,
+    "num_hidden_layers": 48,
+    "num_key_value_heads": 8,
+    "num_local_experts": 16,
+    "output_router_logits": false,
+    "pad_token_id": 200018,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": {
+      "factor": 16.0,
+      "high_freq_factor": 1.0,
+      "low_freq_factor": 1.0,
+      "original_max_position_embeddings": 8192,
+      "rope_type": "llama3"
+    },
+    "rope_theta": 500000.0,
+    "router_aux_loss_coef": 0.001,
+    "router_jitter_noise": 0.0,
+    "torch_dtype": "bfloat16",
+    "use_cache": true,
+    "use_qk_norm": true,
+    "vocab_size": 201135
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.53.0",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu",
+    "hidden_size": 1408,
+    "image_size": 336,
+    "initializer_range": 0.02,
+    "intermediate_size": 5632,
+    "model_type": "llama4_vision_model",
+    "multi_modal_projector_bias": false,
+    "norm_eps": 1e-05,
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 34,
+    "patch_size": 14,
+    "pixel_shuffle_ratio": 0.5,
+    "projector_dropout": 0.0,
+    "projector_input_dim": 4096,
+    "projector_output_dim": 4096,
+    "rope_theta": 10000,
+    "vision_feature_layer": -1,
+    "vision_feature_select_strategy": "default",
+    "vision_output_dim": 4096
+  }
+}

model-00001-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83bdfc4963f0f68d7c8bf580bcce7e2ce867386b2576583f11c0f58684bb18f0
+size 4119245064

model-00002-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2ddfe3dd685d9b609a515052dce9e9cc7d1fb543b522a5b4cf9f3736d367c91
+size 4404205216

model-00004-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2d67492d970dbcdb6c4cf44831f21c8ae7be4f46cb0aeea392564c5e3a0e05
+size 4404205232

model-00005-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9eb00047f2103fe799e461aa04ab3201392fd9fd59854d57060d7d1dd676bf3
+size 4404205232

model-00007-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c106f5b98b4dc53ece26a64d265faa2fd08a8b785ced496f829427e703745e25
+size 4404205232

model-00008-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ddb3dfffb388a653dedad49371ff14ae06746189863f358643017b826901e66
+size 4404205232

model-00009-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d50c84c1869a0caac85b60a83b782a88927be4ceabcf8c6b1c63cabcdf2bbc33
+size 4404205232

model-00011-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d9b801f37d644c700de204181df62d137d1eee9b582774d5def9535a9c64b7f
+size 4404205232

model-00012-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:760d1bb5b5815196d58783806c2d882092d081652f4e82ead49c0d45876e3997
+size 4404205232

model-00014-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:996780ebe1559f9d1a80b6c83f201fe3ddf8e050d4d340783f64cf11a1fbdd8e
+size 4404205216

model-00015-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54d06cb75a42f531c997cd9c4e7bc9184365c4c7528a85abf9a3311d46bc1b66
+size 4404205232

model-00016-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2821ab1efa0c65226e91aabf6c22c86f5df704254174628024b731a079a7dbe7
+size 4404205232

model-00017-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b936cef500a70f24625e22c270c515f2ba0d45207ea1ab2a3082eee5623f2b5
+size 4404205232

model-00018-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd7b30a087c4225c28b471254244b1b3e4eb7d4d7e5985965f0f002c8394c852
+size 4404205232

model-00019-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2883dbea4b45ae202f070cc3451e8bf3ca29e3276a8a114a3686aa5a486eec6e
+size 4404205232

model-00020-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e315535074f3d658e333051a26bee6b13594f75a8e3af40c8dd28134fb976c8
+size 4404205232

model-00022-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c938b92b21c0ad24fda4251edb7538fac50dbcae2e5a461bb39ce584ab02830f
+size 4404205232

model-00023-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eeded936adb0da2448fa51de25b91ef10b30eb91d6faa3cddec71c688c982cd9
+size 4404205232

model-00024-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:367514e793177b7c93c07e7f6956a5cb354c27013da0b1ba935d5763a992765f
+size 4404205232

model-00025-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c9a9d0b72388787e1dd3abf19aabae3fcdde64381fa06ef46644a2bcfd39344
+size 4404205216

model-00026-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2a80c8382fe0804729083cacd7a68bd2e1787ed3e9b9af50a6efde2ba8cfafa
+size 4404205232

model-00027-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18a6e5255bbb5682b26c6aecf03d1a43774c806da9a13f645081505bda086394
+size 4404205232

model-00030-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4900f411508266552ce10fa53fad536de2d5818fe01404494a96d595d83ca2e
+size 4404205232

model-00032-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:230929fb23ee1abb87b0250a4413624756f17bb66eebe29cb651deb80d5fbbd8
+size 4404205232

model-00033-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93a04301c90ff419cd453a866a72c11bf062b6931bcab1090db0a5f6159d8a15
+size 4404205232

model-00034-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9edd1165e5b8edbb9c45e5eee66e2e97688488be9755e9ee0c1d52b476b48054
+size 4404205232

model-00035-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d5c3188c3e324d69410eff7f4462b3fe0c69e4923526b5edf69302b69d18921
+size 4404205232

model-00036-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d1b372f64dc1ba73a3f7251f96c3f4056f67b5faaf8e0dc1902d0ed3531f8a7
+size 4404205216

model-00037-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f62fd355b56ee5fae84e2ca08af103d756e99efaad182a1cd547263c28d4559
+size 4404205232

model-00038-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc69c3240a4da927f1c0d770e5285fbd4ba2f281c82dcd97f6c1567dcd04c8b6
+size 4404205232

model-00039-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f04f00917e0669b6adac047fa67de9195f49b67bf6e327714988addf1a601b36
+size 4404205232

model-00040-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a066fbf7c5d0d249b2bab91d0b58d2408ce92431639ecc80017e7300fb0e3652
+size 4404205232

model-00041-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16a767873b893bcefed665ac474855099f37925151d9515777aef4a766580daa
+size 4404205232

model-00042-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db519ea33c5f77826b4620b4807ad6c1af743964d075f542fc534897837ead29
+size 4404205232

model-00043-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:613200397e4b51734e8dc508cd585ef00d3c907a44adbecd9421e19f81a8797a
+size 4404205232

model-00044-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a1fc08e122807c6c19d0daa8a6125ad6a27d2f0f1c56fd1da0a16f24b777ffe
+size 4404205232

model-00045-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3677a84dcd4f5edad50361deeb1e6d39c98c562f05346234d40b9b5d49466f91
+size 4404205216

model-00046-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc701bf868ca04d2f97d80bc2976fd43840d4940930091af21ab9fc4bd060b1a
+size 4404205216

model-00047-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ccd5f7f029778833ca828bf68ea0e87ff0e35d98603252a4ba63e96d22d170c5
+size 4404205216

model-00049-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a47fc2b1bcd500b42cba1ef212ce78e210739fd34f33a87be574058498537d7
+size 4985848592

model-00050-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21e41a5c1b0008147fbf0378e631ab9fb44f5e97eb0c38247ee811535864fe69
+size 1162300336

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "crop_size": null,
+  "data_format": "channels_first",
+  "default_to_square": true,
+  "device": null,
+  "do_center_crop": null,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "Llama4ImageProcessorFast",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "input_data_format": null,
+  "max_patches": 16,
+  "processor_class": "Llama4Processor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "resize_to_max_canvas": false,
+  "return_tensors": null,
+  "size": {
+    "height": 336,
+    "width": 336
+  }
+}

processor_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "fake_image_token": "<|image|>",
+  "image_token": "<|image|>",
+  "patch_size": 14,
+  "processor_class": "Llama4Processor"
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|begin_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|eot|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|finetune_right_pad|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:172c9eb4beafc72601690da3ccfcede5c2e6806a8d5ec1fca33e22acea8023a4
+size 27948578