yujiepan commited on
Commit
75f5ab8
·
verified ·
1 Parent(s): 401a310

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +60 -1
  2. model.safetensors +2 -2
README.md CHANGED
@@ -28,6 +28,63 @@ pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, trust_remot
28
  print(pipe('Write an article about Artificial Intelligence.'))
29
  ```
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  ### Codes to create this repo:
32
 
33
  ```python
@@ -81,7 +138,9 @@ automap = config_json['auto_map']
81
  torch.set_default_dtype(torch.bfloat16)
82
  model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
83
  torch.set_default_dtype(torch.float32)
84
-
 
 
85
  if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
86
  model.generation_config = GenerationConfig.from_pretrained(
87
  source_model_id, trust_remote_code=True,
 
28
  print(pipe('Write an article about Artificial Intelligence.'))
29
  ```
30
 
31
+ ### Printing the model:
32
+
33
+ ```text
34
+ MiniMaxM1ForCausalLM(
35
+ (model): MiniMaxM1Model(
36
+ (embed_tokens): Embedding(200064, 64)
37
+ (layers): ModuleList(
38
+ (0): MiniMaxM1DecoderLayer(
39
+ (self_attn): MiniMaxM1LightningAttention(
40
+ (out_proj): Linear(in_features=64, out_features=64, bias=False)
41
+ (norm): MiniMaxM1RMSNorm()
42
+ (qkv_proj): Linear(in_features=64, out_features=192, bias=False)
43
+ (output_gate): Linear(in_features=64, out_features=64, bias=False)
44
+ )
45
+ (block_sparse_moe): MiniMaxM1SparseMoeBlock(
46
+ (gate): Linear(in_features=64, out_features=8, bias=False)
47
+ (experts): ModuleList(
48
+ (0-7): 8 x MiniMaxM1BlockSparseTop2MLP(
49
+ (w1): Linear(in_features=64, out_features=128, bias=False)
50
+ (w2): Linear(in_features=128, out_features=64, bias=False)
51
+ (w3): Linear(in_features=64, out_features=128, bias=False)
52
+ (act_fn): SiLU()
53
+ )
54
+ )
55
+ )
56
+ (input_layernorm): MiniMaxM1RMSNorm()
57
+ (post_attention_layernorm): MiniMaxM1RMSNorm()
58
+ )
59
+ (1): MiniMaxM1DecoderLayer(
60
+ (self_attn): MiniMaxM1FlashAttention2(
61
+ (q_proj): Linear(in_features=64, out_features=64, bias=False)
62
+ (k_proj): Linear(in_features=64, out_features=32, bias=False)
63
+ (v_proj): Linear(in_features=64, out_features=32, bias=False)
64
+ (o_proj): Linear(in_features=64, out_features=64, bias=False)
65
+ (rotary_emb): MiniMaxM1RotaryEmbedding()
66
+ )
67
+ (block_sparse_moe): MiniMaxM1SparseMoeBlock(
68
+ (gate): Linear(in_features=64, out_features=8, bias=False)
69
+ (experts): ModuleList(
70
+ (0-7): 8 x MiniMaxM1BlockSparseTop2MLP(
71
+ (w1): Linear(in_features=64, out_features=128, bias=False)
72
+ (w2): Linear(in_features=128, out_features=64, bias=False)
73
+ (w3): Linear(in_features=64, out_features=128, bias=False)
74
+ (act_fn): SiLU()
75
+ )
76
+ )
77
+ )
78
+ (input_layernorm): MiniMaxM1RMSNorm()
79
+ (post_attention_layernorm): MiniMaxM1RMSNorm()
80
+ )
81
+ )
82
+ (norm): MiniMaxM1RMSNorm()
83
+ )
84
+ (lm_head): Linear(in_features=64, out_features=200064, bias=False)
85
+ )
86
+ ```
87
+
88
  ### Codes to create this repo:
89
 
90
  ```python
 
138
  torch.set_default_dtype(torch.bfloat16)
139
  model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
140
  torch.set_default_dtype(torch.float32)
141
+ # according to source model, gat is in FP32
142
+ for i in range(config.num_hidden_layers):
143
+ model.model.layers[i].block_sparse_moe.gate.float()
144
  if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
145
  model.generation_config = GenerationConfig.from_pretrained(
146
  source_model_id, trust_remote_code=True,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6581a14a0cd32f179fec72b1066e97d80ea5ee170199d4ea5de11725807b1fa
3
- size 26470640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9989590c725ca73f9d96e3da308207df3bab221cd046c452506bc50f9ad59770
3
+ size 26472672