guanwenyu1995 commited on
Commit
e199301
·
verified ·
1 Parent(s): ac2f1ac

Upload configuration_minicpm.py

Browse files
Files changed (1) hide show
  1. configuration_minicpm.py +19 -18
configuration_minicpm.py CHANGED
@@ -1,10 +1,5 @@
1
  # coding=utf-8
2
- # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
3
- #
4
- # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
5
- # and OPT implementations in this library. It has been modified from its
6
- # original forms to accommodate minor architectural differences compared
7
- # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
8
  #
9
  # Licensed under the Apache License, Version 2.0 (the "License");
10
  # you may not use this file except in compliance with the License.
@@ -22,7 +17,6 @@
22
  from transformers.configuration_utils import PretrainedConfig
23
  from transformers.utils import logging
24
 
25
-
26
  logger = logging.get_logger(__name__)
27
 
28
  MINICPM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
@@ -111,8 +105,8 @@ class MiniCPMConfig(PretrainedConfig):
111
  >>> configuration = model.config
112
  ```"""
113
 
114
- model_type = "minicpm"
115
- keys_to_ignore_at_inference = ["past_key_values"]
116
 
117
  def __init__(
118
  self,
@@ -122,7 +116,7 @@ class MiniCPMConfig(PretrainedConfig):
122
  num_hidden_layers=32,
123
  num_attention_heads=32,
124
  num_key_value_heads=None,
125
- hidden_act="silu",
126
  max_position_embeddings=2048,
127
  initializer_range=0.02,
128
  rms_norm_eps=1e-6,
@@ -139,8 +133,10 @@ class MiniCPMConfig(PretrainedConfig):
139
  scale_emb=1,
140
  dim_model_base=1,
141
  scale_depth=1,
142
- **kwargs,
143
- ):
 
 
144
  self.vocab_size = vocab_size
145
  self.max_position_embeddings = max_position_embeddings
146
  self.hidden_size = hidden_size
@@ -166,6 +162,11 @@ class MiniCPMConfig(PretrainedConfig):
166
  self.scale_emb = scale_emb
167
  self.dim_model_base = dim_model_base
168
  self.scale_depth = scale_depth
 
 
 
 
 
169
 
170
  super().__init__(
171
  pad_token_id=pad_token_id,
@@ -176,7 +177,7 @@ class MiniCPMConfig(PretrainedConfig):
176
  )
177
  try:
178
  import flash_attn
179
- self._attn_implementation = "flash_attention_2"
180
  except:
181
  pass
182
 
@@ -189,12 +190,12 @@ class MiniCPMConfig(PretrainedConfig):
189
 
190
  if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
191
  raise ValueError(
192
- "`rope_scaling` must be a dictionary with with two fields, `type` and `factor`, "
193
- f"got {self.rope_scaling}"
194
  )
195
- rope_scaling_type = self.rope_scaling.get("type", None)
196
- rope_scaling_factor = self.rope_scaling.get("factor", None)
197
- if rope_scaling_type is None or rope_scaling_type not in ["linear", "dynamic"]:
198
  raise ValueError(
199
  f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
200
  )
 
1
  # coding=utf-8
2
+ # Copyright 2025 The OpenBMB Team. All rights reserved.
 
 
 
 
 
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
17
  from transformers.configuration_utils import PretrainedConfig
18
  from transformers.utils import logging
19
 
 
20
  logger = logging.get_logger(__name__)
21
 
22
  MINICPM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
 
105
  >>> configuration = model.config
106
  ```"""
107
 
108
+ model_type = 'minicpm'
109
+ keys_to_ignore_at_inference = ['past_key_values']
110
 
111
  def __init__(
112
  self,
 
116
  num_hidden_layers=32,
117
  num_attention_heads=32,
118
  num_key_value_heads=None,
119
+ hidden_act='silu',
120
  max_position_embeddings=2048,
121
  initializer_range=0.02,
122
  rms_norm_eps=1e-6,
 
133
  scale_emb=1,
134
  dim_model_base=1,
135
  scale_depth=1,
136
+ mup_denominator=32,
137
+ sparse_config=None,
138
+ **kwargs):
139
+
140
  self.vocab_size = vocab_size
141
  self.max_position_embeddings = max_position_embeddings
142
  self.hidden_size = hidden_size
 
162
  self.scale_emb = scale_emb
163
  self.dim_model_base = dim_model_base
164
  self.scale_depth = scale_depth
165
+ # only used for Eagle Head
166
+ self.mup_denominator = mup_denominator
167
+
168
+ # sparse config
169
+ self.sparse_config = sparse_config
170
 
171
  super().__init__(
172
  pad_token_id=pad_token_id,
 
177
  )
178
  try:
179
  import flash_attn
180
+ self._attn_implementation = 'flash_attention_2'
181
  except:
182
  pass
183
 
 
190
 
191
  if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
192
  raise ValueError(
193
+ '`rope_scaling` must be a dictionary with with two fields, `type` and `factor`, '
194
+ f'got {self.rope_scaling}'
195
  )
196
+ rope_scaling_type = self.rope_scaling.get('type', None)
197
+ rope_scaling_factor = self.rope_scaling.get('factor', None)
198
+ if rope_scaling_type is None or rope_scaling_type not in ['linear', 'dynamic']:
199
  raise ValueError(
200
  f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
201
  )