danielhanchen commited on
Commit
32a6e13
·
verified ·
1 Parent(s): baf4c6b

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -47,3 +47,5 @@ DeepSeek-R1-Distill-Llama-8B-UD-Q4_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
47
  DeepSeek-R1-Distill-Llama-8B-UD-IQ1_M.gguf filter=lfs diff=lfs merge=lfs -text
48
  DeepSeek-R1-Distill-Llama-8B-UD-IQ2_XXS.gguf filter=lfs diff=lfs merge=lfs -text
49
  DeepSeek-R1-Distill-Llama-8B-UD-IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
47
  DeepSeek-R1-Distill-Llama-8B-UD-IQ1_M.gguf filter=lfs diff=lfs merge=lfs -text
48
  DeepSeek-R1-Distill-Llama-8B-UD-IQ2_XXS.gguf filter=lfs diff=lfs merge=lfs -text
49
  DeepSeek-R1-Distill-Llama-8B-UD-IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text
50
+ DeepSeek-R1-Distill-Llama-8B-UD-Q2_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
51
+ DeepSeek-R1-Distill-Llama-8B-UD-Q3_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
DeepSeek-R1-Distill-Llama-8B-UD-IQ1_M.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eee7ed8439d0ddf600f30df1e27c09ac0c190e1018b22226e5ff9733b210170
3
- size 2882747456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7cbaacbdf50833b1703b3fae1781d144bfd2bffea66f9a27ef01167797cfac
3
+ size 2292202944
DeepSeek-R1-Distill-Llama-8B-UD-IQ1_S.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6889818bdf723f79fb858d5676d9d9848cd0b296eb697cce0b2b7c00260c6131
3
- size 2795584576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4195f0b2c4b5af77a878a8cd04d57a45ac3cc9cdc48724c2b536cd4c0ab71815
3
+ size 2164669888
DeepSeek-R1-Distill-Llama-8B-UD-IQ2_M.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:731a33305ac9a0f722aca15066ad782474dc25abfa6184b1e6ef96a936d06a77
3
- size 3273997376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d2070aa908b3913f9d94f5a903defa138214c549045fcc10cca34997df88f5d
3
+ size 3003268544
DeepSeek-R1-Distill-Llama-8B-UD-IQ3_XXS.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5b2ddf7eb35049e6e88673cfbbb039d71e56042ffcca17848a26c9e3218224b
3
- size 3689692224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e73763e4972172dfa1c9f41e38358f80cdfebc947bbcee25122f2cda0f09bf5c
3
+ size 3321773504
DeepSeek-R1-Distill-Llama-8B-UD-Q2_K_XL.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f09aad0d970b8326920c784baa9be920aeab4f74a450fee8c7628b634d2544e8
3
+ size 3388767680
DeepSeek-R1-Distill-Llama-8B-UD-Q3_K_XL.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:466c7688c4b24021ecd492d01e2dfaf773d09d1a2d5b62c0d74d39c67ada0022
3
+ size 4199611840
DeepSeek-R1-Distill-Llama-8B-UD-Q4_K_XL.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:898066848d16cbc29d59b9ea3ae3ee274d5eaf5a9e8b29d6cf6367d2b569920d
3
- size 5011807296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c068e09539243f226c9c68c9bbe6f0fe23ee1aa6b647af465769714f046ff9f1
3
+ size 4994203072
config.json CHANGED
@@ -6,7 +6,6 @@
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 128000,
8
  "eos_token_id": 128001,
9
- "head_dim": 128,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
@@ -17,21 +16,19 @@
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
- "pad_token_id": 128004,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
  "rope_scaling": {
24
  "factor": 8.0,
25
- "high_freq_factor": 4.0,
26
  "low_freq_factor": 1.0,
 
27
  "original_max_position_embeddings": 8192,
28
  "rope_type": "llama3"
29
  },
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
- "transformers_version": "4.51.0",
34
- "unsloth_fixed": true,
35
  "use_cache": true,
36
  "vocab_size": 128256
37
  }
 
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 128000,
8
  "eos_token_id": 128001,
 
9
  "hidden_act": "silu",
10
  "hidden_size": 4096,
11
  "initializer_range": 0.02,
 
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 32,
18
  "num_key_value_heads": 8,
 
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-05,
21
  "rope_scaling": {
22
  "factor": 8.0,
 
23
  "low_freq_factor": 1.0,
24
+ "high_freq_factor": 4.0,
25
  "original_max_position_embeddings": 8192,
26
  "rope_type": "llama3"
27
  },
28
  "rope_theta": 500000.0,
29
  "tie_word_embeddings": false,
30
  "torch_dtype": "bfloat16",
31
+ "transformers_version": "4.43.0.dev0",
 
32
  "use_cache": true,
33
  "vocab_size": 128256
34
  }