TroyDoesAI commited on
Commit
94d7790
·
verified ·
1 Parent(s): ae01b2c

Uses 6.7GB of VRAM Under Inference - Not hard to Get it To Instruct Toxic Behavior, but will not come out on accident anymore.

Browse files

Good up to 4K tested, as its being trained at 4K currently, will extend to longer context once the models loss rate on validation decreases to an acceptible value.

config.json CHANGED
@@ -1,35 +1,35 @@
1
- {
2
- "_name_or_path": ".\\BlackSheep",
3
- "architectures": [
4
- "MixtralForCausalLM"
5
- ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "bos_token_id": 1,
9
- "eos_token_id": 32000,
10
- "hidden_act": "silu",
11
- "hidden_size": 3072,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 8192,
14
- "max_position_embeddings": 8192,
15
- "mlp_bias": false,
16
- "model_type": "mixtral",
17
- "num_attention_heads": 32,
18
- "num_experts_per_tok": 4,
19
- "num_hidden_layers": 32,
20
- "num_key_value_heads": 32,
21
- "num_local_experts": 4,
22
- "output_router_logits": false,
23
- "pretraining_tp": 1,
24
- "rms_norm_eps": 1e-05,
25
- "rope_scaling": null,
26
- "rope_theta": 10000.0,
27
- "router_aux_loss_coef": 0.001,
28
- "router_jitter_noise": 0.0,
29
- "sliding_window": null,
30
- "tie_word_embeddings": false,
31
- "torch_dtype": "float16",
32
- "transformers_version": "4.40.2",
33
- "use_cache": true,
34
- "vocab_size": 32064
35
- }
 
1
+ {
2
+ "_name_or_path": ".\\BlackSheep-MoE",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 32000,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 3072,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 8192,
14
+ "max_position_embeddings": 16384,
15
+ "mlp_bias": false,
16
+ "model_type": "mixtral",
17
+ "num_attention_heads": 32,
18
+ "num_experts_per_tok": 2,
19
+ "num_hidden_layers": 32,
20
+ "num_key_value_heads": 32,
21
+ "num_local_experts": 4,
22
+ "output_router_logits": false,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_scaling": null,
26
+ "rope_theta": 10000.0,
27
+ "router_aux_loss_coef": 0.001,
28
+ "router_jitter_noise": 0.0,
29
+ "sliding_window": null,
30
+ "tie_word_embeddings": false,
31
+ "torch_dtype": "bfloat16",
32
+ "transformers_version": "4.44.2",
33
+ "use_cache": true,
34
+ "vocab_size": 32064
35
+ }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 32000,
5
- "transformers_version": "4.40.2"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 32000,
5
+ "transformers_version": "4.44.2"
6
  }
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e11d3bf4da5ea34df18794ccb10237709f2c9077bdf78d1eebfbae66a73f143c
3
- size 4991365576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1741f46daa03ca1ffcd1ba8eb64dfb6ca8f8af30f463fdec5b0e4b9a6a9efe40
3
+ size 4991365712
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:583b23c13c9940449241e4af1e4e3af1f697346eacfe1edb1a78f1a222012d06
3
- size 4995716136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e445ac7171a8c08b455d0b312d03c3a2051a560aee995c8b415846a684074fe1
3
+ size 4995716272
model-00003-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21268d9ffc47a6c37a703e4f781b51f8b13bf16fd9c06420fa54e2256422b640
3
- size 4957942536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a13c12a1303974c3f07cfe8657749a4bfcdb43c28e244f69d37c7e1c73d14ff
3
+ size 4957942672
model-00004-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff135b7255bbdeefcfe775cfa2e7490f6df5b807b71bedda666d13a5bfdbd1b
3
- size 4995704008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b774a2ca21f24c8571c88292a6f46bf477bac000919874e3842942c604dfa29e
3
+ size 4995704152
model-00005-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eadcd8f3c92b3712d7ea9e8bfaf218443011c01ec9327f5340a766f3b0ce556b
3
- size 2197808032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dae58c3df48a77e7598340fa4078ec9a63fdb4256949fcbd655d0c555cf6a86
3
+ size 2197808096
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",