BigDong commited on
Commit
76dd695
·
1 Parent(s): f70e79c

update models

Browse files
config.json CHANGED
@@ -1,42 +1,193 @@
1
  {
2
- "_name_or_path": "openbmb/CPM-2B",
3
- "architectures": [
4
- "MiniCPMForCausalLM"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
- "auto_map": {
7
- "AutoConfig": "configuration_minicpm.MiniCPMConfig",
8
- "AutoModel": "modeling_minicpm.MiniCPMModel",
9
- "AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM",
10
- "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM",
11
- "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification"
12
- },
13
- "bos_token_id": 1,
14
- "eos_token_id": [2,73440],
15
- "pad_token_id": 2,
16
- "hidden_act": "silu",
17
- "hidden_size": 4096,
18
- "initializer_range": 0.1,
19
- "intermediate_size": 16384,
20
- "max_position_embeddings": 32768,
21
- "model_type": "minicpm",
22
- "num_attention_heads": 32,
23
- "num_hidden_layers": 1,
24
- "num_key_value_heads": 2,
25
- "rms_norm_eps": 1e-06,
26
- "rope_scaling": {
27
- "rope_type": "longrope",
28
- "long_factor": [0.9977997200264581, 1.014658295992452, 1.0349680404997148, 1.059429246056193, 1.0888815016813513, 1.1243301355211495, 1.166977103606075, 1.2182568066927284, 1.2798772354275727, 1.3538666751582975, 1.4426259039919596, 1.5489853358570191, 1.6762658237220625, 1.8283407612492941, 2.0096956085876183, 2.225478927469756, 2.481536379650452, 2.784415934557119, 3.1413289096347365, 3.560047844772632, 4.048719380066383, 4.615569542115128, 5.2684819496549835, 6.014438591970396, 6.858830049237097, 7.804668263503327, 8.851768731513417, 9.99600492938444, 11.228766118181639, 12.536757560834843, 13.902257701387796, 15.303885189125953, 16.717837610115794, 18.119465097853947, 19.484965238406907, 20.792956681060105, 22.02571786985731, 23.16995406772833, 24.217054535738416, 25.16289275000465, 26.007284207271347, 26.753240849586767, 27.40615325712662, 27.973003419175363, 28.461674954469114, 28.880393889607006, 29.237306864684626, 29.540186419591297, 29.79624387177199, 30.01202719065413, 30.193382037992453, 30.34545697551969, 30.47273746338473, 30.579096895249787, 30.66785612408345, 30.741845563814174, 30.80346599254902, 30.85474569563567, 30.897392663720595, 30.932841297560394, 30.962293553185553, 30.986754758742034, 31.007064503249293, 31.02392307921529],
29
- "short_factor": [0.9977997200264581, 1.014658295992452, 1.0349680404997148, 1.059429246056193, 1.0888815016813513, 1.1243301355211495, 1.166977103606075, 1.2182568066927284, 1.2798772354275727, 1.3538666751582975, 1.4426259039919596, 1.5489853358570191, 1.6762658237220625, 1.8283407612492941, 2.0096956085876183, 2.225478927469756, 2.481536379650452, 2.784415934557119, 3.1413289096347365, 3.560047844772632, 4.048719380066383, 4.615569542115128, 5.2684819496549835, 6.014438591970396, 6.858830049237097, 7.804668263503327, 8.851768731513417, 9.99600492938444, 11.228766118181639, 12.536757560834843, 13.902257701387796, 15.303885189125953, 16.717837610115794, 18.119465097853947, 19.484965238406907, 20.792956681060105, 22.02571786985731, 23.16995406772833, 24.217054535738416, 25.16289275000465, 26.007284207271347, 26.753240849586767, 27.40615325712662, 27.973003419175363, 28.461674954469114, 28.880393889607006, 29.237306864684626, 29.540186419591297, 29.79624387177199, 30.01202719065413, 30.193382037992453, 30.34545697551969, 30.47273746338473, 30.579096895249787, 30.66785612408345, 30.741845563814174, 30.80346599254902, 30.85474569563567, 30.897392663720595, 30.932841297560394, 30.962293553185553, 30.986754758742034, 31.007064503249293, 31.02392307921529],
30
- "original_max_position_embeddings": 32768
31
- },
32
- "torch_dtype": "bfloat16",
33
- "transformers_version": "4.36.0",
34
- "use_cache": true,
35
- "vocab_size": 73448,
36
- "scale_emb": 12,
37
- "dim_model_base": 256,
38
- "scale_depth": 1.4,
39
- "tie_word_embeddings": false,
40
- "bias": false,
41
- "head_dim": 128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
 
1
  {
2
+ "_name_or_path": "/DATA/disk1/guanwenyu/models/minicpm4/0527-sft-8000/",
3
+ "architectures": [
4
+ "MiniCPMForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "configuration_minicpm.MiniCPMConfig",
10
+ "AutoModel": "modeling_minicpm.MiniCPMModel",
11
+ "AutoModelForCausalLM": "modeling_minicpm.MiniCPMLongRopeForCausalLM",
12
+ "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMLongRopeForCausalLM",
13
+ "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification"
14
+ },
15
+ "bias": false,
16
+ "bos_token_id": 1,
17
+ "dim_model_base": 256,
18
+ "eos_token_id": [
19
+ 2,
20
+ 73440
21
+ ],
22
+ "head_dim": 128,
23
+ "hidden_act": "silu",
24
+ "hidden_size": 4096,
25
+ "initializer_range": 0.1,
26
+ "intermediate_size": 16384,
27
+ "max_position_embeddings": 32768,
28
+ "model_type": "minicpm",
29
+ "num_attention_heads": 32,
30
+ "num_hidden_layers": 1,
31
+ "num_key_value_heads": 2,
32
+ "pad_token_id": 2,
33
+ "pretraining_tp": 1,
34
+ "quantization_config": {
35
+ "bits": 4,
36
+ "checkpoint_format": "gptq",
37
+ "damp_percent": 0.01,
38
+ "desc_act": false,
39
+ "group_size": 128,
40
+ "lm_head": false,
41
+ "model_file_base_name": null,
42
+ "model_name_or_path": null,
43
+ "quant_method": "gptq",
44
+ "static_groups": false,
45
+ "sym": true,
46
+ "true_sequential": true
47
+ },
48
+ "rms_norm_eps": 1e-06,
49
+ "rope_scaling": {
50
+ "long_factor": [
51
+ 0.9977997200264581,
52
+ 1.014658295992452,
53
+ 1.0349680404997148,
54
+ 1.059429246056193,
55
+ 1.0888815016813513,
56
+ 1.1243301355211495,
57
+ 1.166977103606075,
58
+ 1.2182568066927284,
59
+ 1.2798772354275727,
60
+ 1.3538666751582975,
61
+ 1.4426259039919596,
62
+ 1.5489853358570191,
63
+ 1.6762658237220625,
64
+ 1.8283407612492941,
65
+ 2.0096956085876183,
66
+ 2.225478927469756,
67
+ 2.481536379650452,
68
+ 2.784415934557119,
69
+ 3.1413289096347365,
70
+ 3.560047844772632,
71
+ 4.048719380066383,
72
+ 4.615569542115128,
73
+ 5.2684819496549835,
74
+ 6.014438591970396,
75
+ 6.858830049237097,
76
+ 7.804668263503327,
77
+ 8.851768731513417,
78
+ 9.99600492938444,
79
+ 11.228766118181639,
80
+ 12.536757560834843,
81
+ 13.902257701387796,
82
+ 15.303885189125953,
83
+ 16.717837610115794,
84
+ 18.119465097853947,
85
+ 19.484965238406907,
86
+ 20.792956681060105,
87
+ 22.02571786985731,
88
+ 23.16995406772833,
89
+ 24.217054535738416,
90
+ 25.16289275000465,
91
+ 26.007284207271347,
92
+ 26.753240849586767,
93
+ 27.40615325712662,
94
+ 27.973003419175363,
95
+ 28.461674954469114,
96
+ 28.880393889607006,
97
+ 29.237306864684626,
98
+ 29.540186419591297,
99
+ 29.79624387177199,
100
+ 30.01202719065413,
101
+ 30.193382037992453,
102
+ 30.34545697551969,
103
+ 30.47273746338473,
104
+ 30.579096895249787,
105
+ 30.66785612408345,
106
+ 30.741845563814174,
107
+ 30.80346599254902,
108
+ 30.85474569563567,
109
+ 30.897392663720595,
110
+ 30.932841297560394,
111
+ 30.962293553185553,
112
+ 30.986754758742034,
113
+ 31.007064503249293,
114
+ 31.02392307921529
115
  ],
116
+ "original_max_position_embeddings": 32768,
117
+ "short_factor": [
118
+ 0.9977997200264581,
119
+ 1.014658295992452,
120
+ 1.0349680404997148,
121
+ 1.059429246056193,
122
+ 1.0888815016813513,
123
+ 1.1243301355211495,
124
+ 1.166977103606075,
125
+ 1.2182568066927284,
126
+ 1.2798772354275727,
127
+ 1.3538666751582975,
128
+ 1.4426259039919596,
129
+ 1.5489853358570191,
130
+ 1.6762658237220625,
131
+ 1.8283407612492941,
132
+ 2.0096956085876183,
133
+ 2.225478927469756,
134
+ 2.481536379650452,
135
+ 2.784415934557119,
136
+ 3.1413289096347365,
137
+ 3.560047844772632,
138
+ 4.048719380066383,
139
+ 4.615569542115128,
140
+ 5.2684819496549835,
141
+ 6.014438591970396,
142
+ 6.858830049237097,
143
+ 7.804668263503327,
144
+ 8.851768731513417,
145
+ 9.99600492938444,
146
+ 11.228766118181639,
147
+ 12.536757560834843,
148
+ 13.902257701387796,
149
+ 15.303885189125953,
150
+ 16.717837610115794,
151
+ 18.119465097853947,
152
+ 19.484965238406907,
153
+ 20.792956681060105,
154
+ 22.02571786985731,
155
+ 23.16995406772833,
156
+ 24.217054535738416,
157
+ 25.16289275000465,
158
+ 26.007284207271347,
159
+ 26.753240849586767,
160
+ 27.40615325712662,
161
+ 27.973003419175363,
162
+ 28.461674954469114,
163
+ 28.880393889607006,
164
+ 29.237306864684626,
165
+ 29.540186419591297,
166
+ 29.79624387177199,
167
+ 30.01202719065413,
168
+ 30.193382037992453,
169
+ 30.34545697551969,
170
+ 30.47273746338473,
171
+ 30.579096895249787,
172
+ 30.66785612408345,
173
+ 30.741845563814174,
174
+ 30.80346599254902,
175
+ 30.85474569563567,
176
+ 30.897392663720595,
177
+ 30.932841297560394,
178
+ 30.962293553185553,
179
+ 30.986754758742034,
180
+ 31.007064503249293,
181
+ 31.02392307921529
182
+ ],
183
+ "type": "longrope"
184
+ },
185
+ "rope_theta": 10000.0,
186
+ "scale_depth": 1.4,
187
+ "scale_emb": 12,
188
+ "tie_word_embeddings": false,
189
+ "torch_dtype": "float16",
190
+ "transformers_version": "4.40.2",
191
+ "use_cache": true,
192
+ "vocab_size": 73448
193
  }
freq_16384.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8988ab42c340bb8adbf959e18fca86e883d5575524868cef95f0495af59e4e5
3
+ size 50477
freq_32768.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8375dc2d2b32c63f05300409e07b7c2db5a0c148625921efd375569a5d30d3ac
3
+ size 100333
freq_4096.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:337de82bbf09cc2225a315dcaf10381c039a2ee16b577015d2b5fab340c62e48
3
+ size 13549
freq_8192.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7219ec85b36022c41788aa274a0a81ec86d2a7869fd6e8dbe851ec79ab8e35b
3
+ size 25837
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fabad72213b7aece79e08b5252f6fe15e52e92368bd84cf700e87b36e5c719e
3
- size 791020952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66ee33469942f4a1d8548779ef7652ea2372b36f2f0ee6023aaff4741cd70eb3
3
+ size 741213672
quantize_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "damp_percent": 0.01,
5
+ "desc_act": false,
6
+ "static_groups": false,
7
+ "sym": true,
8
+ "true_sequential": true,
9
+ "lm_head": false,
10
+ "model_name_or_path": null,
11
+ "model_file_base_name": null,
12
+ "quant_method": "gptq",
13
+ "checkpoint_format": "gptq"
14
+ }
special_tokens_map.json CHANGED
@@ -1,13 +1,61 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|im_end|>",
4
- "<|im_start|>",
5
- "<|tool_call|>",
6
- "<|execute_start|>",
7
- "<|execute_end|>",
8
- "<|fim_prefix|>",
9
- "<|fim_middle|>",
10
- "<|fim_suffix|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ],
12
  "bos_token": {
13
  "content": "<s>",
@@ -17,7 +65,7 @@
17
  "single_word": false
18
  },
19
  "eos_token": {
20
- "content": "<|im_end|>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_end|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_start|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<|tool_call|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<|execute_start|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<|execute_end|>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<|fim_prefix|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<|fim_middle|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<|fim_suffix|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ }
59
  ],
60
  "bos_token": {
61
  "content": "<s>",
 
65
  "single_word": false
66
  },
67
  "eos_token": {
68
+ "content": "</s>",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
- "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
@@ -103,7 +102,6 @@
103
  "<|fim_suffix|>"
104
  ],
105
  "bos_token": "<s>",
106
- "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
107
  "clean_up_tokenization_spaces": false,
108
  "eos_token": "<|im_end|>",
109
  "legacy": true,
@@ -113,5 +111,6 @@
113
  "spaces_between_special_tokens": false,
114
  "tokenizer_class": "LlamaTokenizer",
115
  "unk_token": "<unk>",
116
- "use_default_system_prompt": false
 
117
  }
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
102
  "<|fim_suffix|>"
103
  ],
104
  "bos_token": "<s>",
 
105
  "clean_up_tokenization_spaces": false,
106
  "eos_token": "<|im_end|>",
107
  "legacy": true,
 
111
  "spaces_between_special_tokens": false,
112
  "tokenizer_class": "LlamaTokenizer",
113
  "unk_token": "<unk>",
114
+ "use_default_system_prompt": false,
115
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
116
  }