Upload folder using huggingface_hub

#4
by sharpenb - opened
Files changed (4) hide show
  1. config.json +23 -2
  2. configuration_baichuan.py +66 -0
  3. qmodel.pt +2 -2
  4. smash_config.json +1 -1
config.json CHANGED
@@ -1,10 +1,11 @@
1
  {
2
- "_name_or_path": "baichuan-inc/Baichuan-7B",
 
3
  "architectures": [
4
  "BaiChuanForCausalLM"
5
  ],
6
  "auto_map": {
7
- "AutoConfig": "baichuan-inc/Baichuan-7B--configuration_baichuan.BaiChuanConfig",
8
  "AutoModelForCausalLM": "baichuan-inc/Baichuan-7B--modeling_baichuan.BaiChuanForCausalLM"
9
  },
10
  "bos_token_id": 1,
@@ -18,6 +19,26 @@
18
  "num_attention_heads": 32,
19
  "num_hidden_layers": 32,
20
  "pad_token_id": 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "rms_norm_eps": 1e-06,
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "float32",
 
1
  {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "/tmp/models/tmpphwma9oi/tmpyo9n14wg",
4
  "architectures": [
5
  "BaiChuanForCausalLM"
6
  ],
7
  "auto_map": {
8
+ "AutoConfig": "configuration_baichuan.BaiChuanConfig",
9
  "AutoModelForCausalLM": "baichuan-inc/Baichuan-7B--modeling_baichuan.BaiChuanForCausalLM"
10
  },
11
  "bos_token_id": 1,
 
19
  "num_attention_heads": 32,
20
  "num_hidden_layers": 32,
21
  "pad_token_id": 0,
22
+ "quantization_config": {
23
+ "quant_config": {
24
+ "offload_meta": false,
25
+ "scale_quant_params": null,
26
+ "weight_quant_params": {
27
+ "axis": 1,
28
+ "channel_wise": true,
29
+ "group_size": 64,
30
+ "nbits": 4,
31
+ "optimize": true,
32
+ "round_zero": true,
33
+ "view_as_float": false
34
+ },
35
+ "zero_quant_params": null
36
+ },
37
+ "quant_method": "hqq",
38
+ "skip_modules": [
39
+ "lm_head"
40
+ ]
41
+ },
42
  "rms_norm_eps": 1e-06,
43
  "tie_word_embeddings": false,
44
  "torch_dtype": "float32",
configuration_baichuan.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
5
+ # and OPT implementations in this library. It has been modified from its
6
+ # original forms to accommodate minor architectural differences compared
7
+ # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+
21
+ from transformers.configuration_utils import PretrainedConfig
22
+ from transformers.utils import logging
23
+
24
+
25
+ logger = logging.get_logger(__name__)
26
+
27
+
28
+ class BaiChuanConfig(PretrainedConfig):
29
+ model_type = "baichuan"
30
+ keys_to_ignore_at_inference = ["past_key_values"]
31
+
32
+ def __init__(
33
+ self,
34
+ vocab_size=64000,
35
+ hidden_size=4096,
36
+ intermediate_size=11008,
37
+ num_hidden_layers=32,
38
+ num_attention_heads=32,
39
+ hidden_act="silu",
40
+ max_position_embeddings=4096,
41
+ initializer_range=0.02,
42
+ rms_norm_eps=1e-6,
43
+ use_cache=True,
44
+ pad_token_id=0,
45
+ bos_token_id=1,
46
+ eos_token_id=2,
47
+ tie_word_embeddings=False,
48
+ **kwargs,
49
+ ):
50
+ self.vocab_size = vocab_size
51
+ self.max_position_embeddings = max_position_embeddings
52
+ self.hidden_size = hidden_size
53
+ self.intermediate_size = intermediate_size
54
+ self.num_hidden_layers = num_hidden_layers
55
+ self.num_attention_heads = num_attention_heads
56
+ self.hidden_act = hidden_act
57
+ self.initializer_range = initializer_range
58
+ self.rms_norm_eps = rms_norm_eps
59
+ self.use_cache = use_cache
60
+ super().__init__(
61
+ pad_token_id=pad_token_id,
62
+ bos_token_id=bos_token_id,
63
+ eos_token_id=eos_token_id,
64
+ tie_word_embeddings=tie_word_embeddings,
65
+ **kwargs,
66
+ )
qmodel.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78e8ca1fbb56e0bdf9c81f485b634a6d1a0a373f9e4afda27e4b5aba91f65b5d
3
- size 4692057261
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5907dae0b9833f9f51af48c2c97fd6575d50e004826beb85544e4d4e3ecad6c
3
+ size 6145913165
smash_config.json CHANGED
@@ -11,7 +11,7 @@
11
  "quant_hqq_weight_bits": 4,
12
  "max_batch_size": 1,
13
  "device": "cuda",
14
- "cache_dir": "/tmp/models/tmp2p__e1si",
15
  "task": "",
16
  "save_load_fn": "hqq",
17
  "save_load_fn_args": {},
 
11
  "quant_hqq_weight_bits": 4,
12
  "max_batch_size": 1,
13
  "device": "cuda",
14
+ "cache_dir": "/tmp/models/tmpphwma9oi",
15
  "task": "",
16
  "save_load_fn": "hqq",
17
  "save_load_fn_args": {},