Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.gitattributes +1 -0
README.md +104 -0
added_tokens.json +38 -0
config.json +47 -0
generation_config.json +4 -0
merges.txt +0 -0
model.safetensors +3 -0
special_tokens_map.json +23 -0
tokenizer.json +3 -0
tokenizer_config.json +301 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,104 @@

+---
+library_name: transformers
+pipeline_tag: text-generation
+inference: true
+widget:
+  - text: Hello!
+    example_title: Hello world
+    group: Python
+---
+This tiny model is for debugging. It is randomly initialized with the config adapted from [MiniMaxAI/MiniMax-M1-80k](https://huggingface.co/MiniMaxAI/MiniMax-M1-80k).
+### Example usage:
+```python
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+model_id = "yujiepan/minimax-m1-tiny-random"
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    trust_remote_code=True,
+)
+pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, trust_remote_code=True)
+print(pipe('Write an article about Artificial Intelligence.'))
+```
+### Codes to create this repo:
+```python
+import json
+from pathlib import Path
+import torch
+import accelerate
+from huggingface_hub import file_exists, hf_hub_download
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    GenerationConfig,
+    set_seed,
+)
+source_model_id = "MiniMaxAI/MiniMax-M1-80k"
+save_folder = "/tmp/yujiepan/minimax-m1-tiny-random"
+processor = AutoTokenizer.from_pretrained(source_model_id)
+processor.save_pretrained(save_folder)
+with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r', encoding='utf-8') as f:
+    config_json = json.load(f)
+config_json["attn_type_list"] = [0, 1]  # one lightning, one attention
+for k, v in config_json['auto_map'].items():
+    config_json['auto_map'][k] = f'{source_model_id}--{v}'
+config_json['head_dim'] = 32
+config_json['hidden_size'] = 64
+config_json['intermediate_size'] = 128
+config_json['num_attention_heads'] = 2
+config_json['num_experts_per_tok'] = 2
+config_json['num_hidden_layers'] = 2
+config_json['num_key_value_heads'] = 1
+config_json['num_local_experts'] = 8
+config_json['rotary_dim'] = 16
+config_json['tie_word_embeddings'] = True
+with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
+    json.dump(config_json, f, indent=2)
+config = AutoConfig.from_pretrained(
+    save_folder,
+    trust_remote_code=True,
+)
+print(config)
+automap = config_json['auto_map']
+torch.set_default_dtype(torch.bfloat16)
+model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
+torch.set_default_dtype(torch.float32)
+if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
+    model.generation_config = GenerationConfig.from_pretrained(
+        source_model_id, trust_remote_code=True,
+    )
+set_seed(42)
+model = model.cpu()  # cpu is more stable for random initialization across machines
+with torch.no_grad():
+    for name, p in sorted(model.named_parameters()):
+        torch.nn.init.normal_(p, 0, 0.2)
+        print(name, p.shape)
+model.save_pretrained(save_folder)
+print(model)
+with open(f"{save_folder}/config.json", "r", encoding='utf-8') as f:
+    config_json = json.load(f)
+    config_json['auto_map'] = automap
+with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
+    json.dump(config_json, f, indent=2)
+for python_file in Path(save_folder).glob('*.py'):
+    python_file.unlink()
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "<begin_of_document>": 200034,
+  "<beginning_of_sentence>": 200019,
+  "<code_interpreter>": 200023,
+  "<commit_after>": 200018,
+  "<commit_before>": 200016,
+  "<commit_msg>": 200017,
+  "<empty_output>": 200015,
+  "<end_of_document>": 200021,
+  "<end_of_image>": 200030,
+  "<end_of_sentence>": 200020,
+  "<end_of_speech>": 200028,
+  "<end_of_video>": 200032,
+  "<filename>": 200006,
+  "<fim_middle>": 200002,
+  "<fim_pad>": 200004,
+  "<fim_prefix>": 200001,
+  "<fim_suffix>": 200003,
+  "<function_call>": 200022,
+  "<gh_stars>": 200007,
+  "<image>": 200025,
+  "<issue_closed>": 200010,
+  "<issue_comment>": 200009,
+  "<issue_start>": 200008,
+  "<jupyter_code>": 200013,
+  "<jupyter_error>": 200035,
+  "<jupyter_output>": 200014,
+  "<jupyter_start>": 200011,
+  "<jupyter_text>": 200012,
+  "<pad>": 200000,
+  "<reponame>": 200005,
+  "<speech>": 200024,
+  "<start_of_image>": 200029,
+  "<start_of_speech>": 200027,
+  "<start_of_video>": 200031,
+  "<video>": 200026,
+  "<vision_pad>": 200033
+}

config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "architectures": [
+    "MiniMaxM1ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "attn_type_list": [
+    0,
+    1
+  ],
+  "auto_map": {
+    "AutoConfig": "MiniMaxAI/MiniMax-M1-80k--configuration_minimax_m1.MiniMaxM1Config",
+    "AutoModelForCausalLM": "MiniMaxAI/MiniMax-M1-80k--modeling_minimax_m1.MiniMaxM1ForCausalLM"
+  },
+  "head_dim": 32,
+  "hidden_act": "silu",
+  "hidden_size": 64,
+  "initializer_range": 0.02,
+  "intermediate_size": 128,
+  "layernorm_full_attention_alpha": 3.5565588200778455,
+  "layernorm_full_attention_beta": 1.0,
+  "layernorm_linear_attention_alpha": 3.5565588200778455,
+  "layernorm_linear_attention_beta": 1.0,
+  "layernorm_mlp_alpha": 3.5565588200778455,
+  "layernorm_mlp_beta": 1.0,
+  "max_position_embeddings": 10240000,
+  "model_type": "MiniMaxM1",
+  "num_attention_heads": 2,
+  "num_experts_per_tok": 2,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 1,
+  "num_local_experts": 8,
+  "output_router_logits": false,
+  "postnorm": true,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000000,
+  "rotary_dim": 16,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "shared_intermediate_size": 0,
+  "shared_moe_mode": "sigmoid",
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3",
+  "use_cache": true,
+  "vocab_size": 200064
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "_from_model_config": true,
+  "transformers_version": "4.51.3"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6581a14a0cd32f179fec72b1066e97d80ea5ee170199d4ea5de11725807b1fa
+size 26470640

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<beginning_of_sentence>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<end_of_sentence>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<end_of_document>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a158429317db5b1720d40da8c461142444eb5098a4e1a1182dbd9f50bc32da6
+size 15519354

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,301 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "200000": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200001": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200002": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200003": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200004": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200005": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200006": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200007": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200008": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200009": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200010": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200011": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200012": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200013": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200014": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200015": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200016": {
+      "content": "<commit_before>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200017": {
+      "content": "<commit_msg>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200018": {
+      "content": "<commit_after>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200019": {
+      "content": "<beginning_of_sentence>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200020": {
+      "content": "<end_of_sentence>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200021": {
+      "content": "<end_of_document>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200022": {
+      "content": "<function_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200023": {
+      "content": "<code_interpreter>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200024": {
+      "content": "<speech>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200025": {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200026": {
+      "content": "<video>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200027": {
+      "content": "<start_of_speech>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200028": {
+      "content": "<end_of_speech>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200029": {
+      "content": "<start_of_image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200030": {
+      "content": "<end_of_image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200031": {
+      "content": "<start_of_video>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200032": {
+      "content": "<end_of_video>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200033": {
+      "content": "<vision_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200034": {
+      "content": "<begin_of_document>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200035": {
+      "content": "<jupyter_error>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<beginning_of_sentence>",
+  "chat_template": "{{ '<begin_of_document>' -}}{% set ns = namespace(system_prompt='') -%}{% for message in messages -%}{% if message['role'] == 'system' -%}{% set ns.system_prompt = ns.system_prompt + message['content'][0]['text'] -%}{% endif -%}{%- endfor -%}{% if ns.system_prompt != '' -%}{{ '<beginning_of_sentence>system ai_setting=assistant\n' + ns.system_prompt + '<end_of_sentence>\n' -}}{%- endif -%}{% if tools -%}{{ '<beginning_of_sentence>system tool_setting=tools\nYou are provided with these tools:\n<tools>\n' -}}{% for tool in tools -%}{{ tool | tojson ~ '\n' -}}{%- endfor -%}{{ '</tools>\n\nIf you need to call tools, please respond with <tool_calls></tool_calls> XML tags, and provide tool-name and json-object of arguments, following the format below:\n<tool_calls>\n{''name'': <tool-name-1>, ''arguments'': <args-json-object-1>}\n...\n</tool_calls><end_of_sentence>\n' -}}{%- endif -%}{% for message in messages -%}{% if message['role'] == 'user' -%}{{ '<beginning_of_sentence>user name=user\n' + message['content'][0]['text'] + '<end_of_sentence>\n' -}}{% elif message['role'] == 'assistant' -%}{{ '<beginning_of_sentence>ai name=assistant\n' -}}{% for content in message['content'] | selectattr('type', 'equalto', 'text') -%}{{ content['text'] -}}{%- endfor -%}{{ '<end_of_sentence>\n' -}}{% elif message['role'] == 'tool' -%}{{ '<beginning_of_sentence>tool name=tools\n' }} {%- for content in message['content'] -%}{{- 'tool name: ' + content['name'] + '\n' + 'tool result: ' + content['text'] + '\n\n' -}} {%- endfor -%}{{- '<end_of_sentence>\n' -}}{% endif -%}{%- endfor -%}{% if add_generation_prompt -%}{{ '<beginning_of_sentence>ai name=assistant\n' -}}{%- endif -%}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<end_of_sentence>",
+  "extra_special_tokens": {},
+  "model_max_length": 40960000,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<end_of_document>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff