niobures commited on
Commit
73c2a97
·
verified ·
1 Parent(s): b066ebf

Orpheus (en, ru)

Browse files
.gitattributes CHANGED
@@ -37,3 +37,5 @@ de,es,fr,it,nl,pl,pt/Orpheus-Cml/tokenizer.json filter=lfs diff=lfs merge=lfs -t
37
  es,it/Orpheus-3b-Italian_Spanish-FT-Q8_0.gguf/Orpheus-3b-Italian_Spanish-FT-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
38
  it/Orpheus-Cml-IT/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  ar/Orpheus-TTS-MediaSpeech-AR/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
37
  es,it/Orpheus-3b-Italian_Spanish-FT-Q8_0.gguf/Orpheus-3b-Italian_Spanish-FT-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
38
  it/Orpheus-Cml-IT/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  ar/Orpheus-TTS-MediaSpeech-AR/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ en/orpheus_3b_0.1_ft_16bit/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ ru,en/lora_orpheus_gothic/tokenizer.json filter=lfs diff=lfs merge=lfs -text
en/orpheus_3b_0.1_ft_16bit/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
en/orpheus_3b_0.1_ft_16bit/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit
3
+ tags:
4
+ - text-generation-inference
5
+ - transformers
6
+ - unsloth
7
+ - llama
8
+ - trl
9
+ - tts
10
+ - text-to-speech
11
+ license: apache-2.0
12
+ library_name: transformers
13
+ language:
14
+ - en
15
+ datasets:
16
+ - MrDragonFox/Elise
17
+ ---
18
+
19
+ # Uploaded model
20
+
21
+ - **Finetuned by:** Prince-1
22
+ - **License:** apache-2.0
23
+ - **Finetuned from model :** unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit
24
+
25
+ This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
26
+
27
+ [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
28
+
29
+
30
+ Orpheus TTS is a state-of-the-art, Llama-based Speech-LLM designed for high-quality, empathetic text-to-speech generation. This model has been finetuned to deliver human-level speech synthesis, achieving exceptional clarity, expressiveness, and real-time streaming performances.
31
+
32
+ # Model Details
33
+
34
+ ### Model Capabilities
35
+
36
+ - **Human-Like Speech**: Natural intonation, emotion, and rhythm that is superior to SOTA closed source models
37
+ - **Zero-Shot Voice Cloning**: Clone voices without prior fine-tuning
38
+ - **Guided Emotion and Intonation**: Control speech and emotion characteristics with simple tags
39
+ - **Low Latency**: ~200ms streaming latency for realtime applications, reducible to ~100ms with input streaming
40
+
41
+
42
+ ### Model Sources
43
+
44
+ - **GitHub Repo:** [https://github.com/canopyai/Orpheus-TTS](https://github.com/canopyai/Orpheus-TTS)
45
+ - **Blog Post:** [https://canopylabs.ai/model-releases](https://canopylabs.ai/model-releases)
46
+ - **Colab Inference Notebook:** [notebook link](https://colab.research.google.com/drive/1KhXT56UePPUHhqitJNUxq63k-pQomz3N?usp=sharing)
47
+
48
+
49
+ # Usage
50
+
51
+ Check out our Colab ([link to Colab](https://) or GitHub ([link to GitHub](https://github.com/canopyai/Orpheus-TTS)) on how to run easy inference on our finetuned models.
52
+
53
+
54
+ # Model Misuse
55
+ Do not use our models for impersonation without consent, misinformation or deception (including fake news or fraudulent calls), or any illegal or harmful activity. By using this model, you agree to follow all applicable laws and ethical guidelines. We disclaim responsibility for any use.
en/orpheus_3b_0.1_ft_16bit/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": 128009,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 3072,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 8192,
14
+ "max_position_embeddings": 131072,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 24,
18
+ "num_hidden_layers": 28,
19
+ "num_key_value_heads": 8,
20
+ "pad_token_id": 128004,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": {
24
+ "factor": 32.0,
25
+ "high_freq_factor": 4.0,
26
+ "low_freq_factor": 1.0,
27
+ "original_max_position_embeddings": 8192,
28
+ "rope_type": "llama3"
29
+ },
30
+ "rope_theta": 500000.0,
31
+ "tie_word_embeddings": true,
32
+ "torch_dtype": "float16",
33
+ "transformers_version": "4.51.3",
34
+ "unsloth_fixed": true,
35
+ "unsloth_version": "2025.4.3",
36
+ "use_cache": true,
37
+ "vocab_size": 156940
38
+ }
en/orpheus_3b_0.1_ft_16bit/generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "do_sample": true,
5
+ "eos_token_id": 128009,
6
+ "max_length": 131072,
7
+ "pad_token_id": 128004,
8
+ "temperature": 0.6,
9
+ "top_p": 0.9,
10
+ "transformers_version": "4.51.3"
11
+ }
en/orpheus_3b_0.1_ft_16bit/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:498c4853d1830e73eacb62bbd1210fc55681d4043461c1d90b610371ebb1f502
3
+ size 4991037784
en/orpheus_3b_0.1_ft_16bit/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b505fc6762674202d2192f615579b7fdf9557f4a50a4fc0a151beb9fbffe43fd
3
+ size 1610725520
en/orpheus_3b_0.1_ft_16bit/model.safetensors.index.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 6601734144
4
+ },
5
+ "weight_map": {
6
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
7
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
8
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
9
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
12
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
13
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
14
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
16
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
17
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
18
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
20
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
21
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
22
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
24
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
25
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
26
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
27
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
28
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
30
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
31
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
32
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
35
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
36
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
39
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
40
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
42
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
44
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
45
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
46
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
47
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
48
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
49
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
50
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
52
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
53
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
54
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
57
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
58
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
60
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
61
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
62
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
63
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
64
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
66
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
67
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
68
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
69
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
71
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
72
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
73
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
75
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
76
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
78
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
79
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
80
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
81
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
83
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
84
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
85
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
86
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
88
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
89
+ "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
90
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
91
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
92
+ "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
93
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
94
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
96
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
97
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
98
+ "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
99
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
100
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
101
+ "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
102
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
103
+ "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
104
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
105
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
107
+ "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
108
+ "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
109
+ "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
110
+ "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
111
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
112
+ "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
113
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
114
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
115
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
116
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
117
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
118
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
119
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
120
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
121
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
122
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
123
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
124
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
125
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
126
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
127
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
128
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
129
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
130
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
131
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
132
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
133
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
134
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
135
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
136
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
137
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
138
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
139
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
140
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
141
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
142
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
143
+ "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
144
+ "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
145
+ "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
146
+ "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
147
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
148
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
149
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
150
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
151
+ "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
152
+ "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
153
+ "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
154
+ "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
155
+ "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
156
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
157
+ "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
158
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
159
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
160
+ "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
161
+ "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
162
+ "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
163
+ "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
164
+ "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
165
+ "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
166
+ "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
167
+ "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
168
+ "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
169
+ "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
170
+ "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
171
+ "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
172
+ "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
173
+ "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
174
+ "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
175
+ "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
176
+ "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
178
+ "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
179
+ "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
180
+ "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
181
+ "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
182
+ "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
183
+ "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
184
+ "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
185
+ "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
186
+ "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
187
+ "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
188
+ "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
189
+ "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
190
+ "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
191
+ "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
192
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
193
+ "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
194
+ "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
196
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
197
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
198
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
199
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
200
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
201
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
202
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
203
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
204
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
205
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
206
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
207
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
208
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
209
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
210
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
211
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
212
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
213
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
214
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
215
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
216
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
217
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
218
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
219
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
220
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
221
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
222
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
223
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
224
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
225
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
226
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
227
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
228
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
229
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
230
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
231
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
232
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
233
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
234
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
235
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
236
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
237
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
238
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
239
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
240
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
241
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
242
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
243
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
244
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
245
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
246
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
247
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
248
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
249
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
250
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
251
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
252
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
253
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
254
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
255
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
256
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
257
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
258
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
259
+ "model.norm.weight": "model-00002-of-00002.safetensors"
260
+ }
261
+ }
en/orpheus_3b_0.1_ft_16bit/pytorch_model-00001-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97bfa8d32f37c62a2f5709c9cc1f2ed4079c8aacfcb0d80a68d6e4604fad3739
3
+ size 4991081429
en/orpheus_3b_0.1_ft_16bit/pytorch_model-00002-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:163ba873eced94ef7a380273565287a7d65be02aad87aad7914f4015cc5e9381
3
+ size 1610743180
en/orpheus_3b_0.1_ft_16bit/pytorch_model.bin.index.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 6601734144
4
+ },
5
+ "weight_map": {
6
+ "model.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
7
+ "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
8
+ "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
9
+ "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
10
+ "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
11
+ "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
12
+ "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
13
+ "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
14
+ "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
15
+ "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
16
+ "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
17
+ "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
18
+ "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
19
+ "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
20
+ "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
21
+ "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
22
+ "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
23
+ "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
24
+ "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
25
+ "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
26
+ "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
27
+ "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
28
+ "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
29
+ "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
30
+ "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
31
+ "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
32
+ "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
33
+ "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
34
+ "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
35
+ "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
36
+ "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
37
+ "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
38
+ "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
39
+ "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
40
+ "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
41
+ "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
42
+ "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
43
+ "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
44
+ "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
45
+ "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
46
+ "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
47
+ "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
48
+ "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
49
+ "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
50
+ "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
51
+ "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
52
+ "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
53
+ "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
54
+ "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
55
+ "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
56
+ "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
57
+ "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
58
+ "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
59
+ "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
60
+ "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
61
+ "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
62
+ "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
63
+ "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
64
+ "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
65
+ "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
66
+ "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
67
+ "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
68
+ "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
69
+ "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
70
+ "model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
71
+ "model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
72
+ "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
73
+ "model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
74
+ "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
75
+ "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
76
+ "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
77
+ "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
78
+ "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
79
+ "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
80
+ "model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
81
+ "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
82
+ "model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
83
+ "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
84
+ "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
85
+ "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
86
+ "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
87
+ "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
88
+ "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
89
+ "model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
90
+ "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
91
+ "model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
92
+ "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
93
+ "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
94
+ "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
95
+ "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
96
+ "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
97
+ "model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
98
+ "model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
99
+ "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
100
+ "model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
101
+ "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
102
+ "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
103
+ "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
104
+ "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
105
+ "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
106
+ "model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
107
+ "model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
108
+ "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
109
+ "model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
110
+ "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
111
+ "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
112
+ "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
113
+ "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
114
+ "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
115
+ "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
116
+ "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
117
+ "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
118
+ "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
119
+ "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
120
+ "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
121
+ "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
122
+ "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
123
+ "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
124
+ "model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
125
+ "model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
126
+ "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
127
+ "model.layers.20.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
128
+ "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
129
+ "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
130
+ "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
131
+ "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
132
+ "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
133
+ "model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
134
+ "model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
135
+ "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
136
+ "model.layers.21.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
137
+ "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
138
+ "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
139
+ "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
140
+ "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
141
+ "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
142
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
143
+ "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
144
+ "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
145
+ "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
146
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
147
+ "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
148
+ "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
149
+ "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
150
+ "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
151
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
152
+ "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
153
+ "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
154
+ "model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
155
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
156
+ "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
157
+ "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
158
+ "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
159
+ "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
160
+ "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
161
+ "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
162
+ "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
163
+ "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
164
+ "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
165
+ "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
166
+ "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
167
+ "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
168
+ "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
169
+ "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
170
+ "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
171
+ "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
172
+ "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
173
+ "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
174
+ "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
175
+ "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
176
+ "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
177
+ "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
178
+ "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
179
+ "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
180
+ "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
181
+ "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
182
+ "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
183
+ "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
184
+ "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
185
+ "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
186
+ "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
187
+ "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
188
+ "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
189
+ "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
190
+ "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
191
+ "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
192
+ "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
193
+ "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
194
+ "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
195
+ "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
196
+ "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
197
+ "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
198
+ "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
199
+ "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
200
+ "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
201
+ "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
202
+ "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
203
+ "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
204
+ "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
205
+ "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
206
+ "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
207
+ "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
208
+ "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
209
+ "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
210
+ "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
211
+ "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
212
+ "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
213
+ "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
214
+ "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
215
+ "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
216
+ "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
217
+ "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
218
+ "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
219
+ "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
220
+ "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
221
+ "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
222
+ "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
223
+ "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
224
+ "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
225
+ "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
226
+ "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
227
+ "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
228
+ "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
229
+ "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
230
+ "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
231
+ "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
232
+ "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
233
+ "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
234
+ "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
235
+ "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
236
+ "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
237
+ "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
238
+ "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
239
+ "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
240
+ "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
241
+ "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
242
+ "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
243
+ "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
244
+ "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
245
+ "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
246
+ "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
247
+ "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
248
+ "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
249
+ "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
250
+ "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
251
+ "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
252
+ "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
253
+ "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
254
+ "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
255
+ "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
256
+ "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
257
+ "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
258
+ "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
259
+ "model.norm.weight": "pytorch_model-00002-of-00002.bin"
260
+ }
261
+ }
en/orpheus_3b_0.1_ft_16bit/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Prince-1/orpheus_3b_0.1_ft_16bit
en/orpheus_3b_0.1_ft_16bit/special_tokens_map.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|audio|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<|begin_of_text|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "<|eot_id|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "pad_token": {
20
+ "content": "<|finetune_right_pad_id|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ }
26
+ }
en/orpheus_3b_0.1_ft_16bit/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3fecb199b4170636dbfab986d25f628157268d37b861f9cadaca60b1353bce
3
+ size 22849547
en/orpheus_3b_0.1_ft_16bit/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
ru,en/lora_orpheus_gothic/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
ru,en/lora_orpheus_gothic/README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/orpheus-3b-0.1-pretrained-unsloth-bnb-4bit
3
+ tags:
4
+ - text-generation-inference
5
+ - transformers
6
+ - unsloth
7
+ - llama
8
+ - trl
9
+ license: apache-2.0
10
+ language:
11
+ - en
12
+ - ru
13
+ ---
14
+
15
+ # Uploaded model
16
+
17
+ - **Developed by:** dkulemin
18
+ - **License:** apache-2.0
19
+ - **Finetuned from model :** unsloth/orpheus-3b-0.1-pretrained-unsloth-bnb-4bit
20
+
21
+ This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
22
+
23
+ [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
ru,en/lora_orpheus_gothic/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/orpheus-3b-0.1-pretrained-unsloth-bnb-4bit",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 64,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 64,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "gate_proj",
29
+ "up_proj",
30
+ "k_proj",
31
+ "o_proj",
32
+ "down_proj",
33
+ "q_proj",
34
+ "v_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
ru,en/lora_orpheus_gothic/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f5d5d1520c8dc619fc6338967d0156557c72b5356ca6a24e59accb9b22731e
3
+ size 2317553416
ru,en/lora_orpheus_gothic/chat_template.jinja ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
+ {%- set messages = messages[1:] %}
23
+ {%- else %}
24
+ {%- set system_message = "" %}
25
+ {%- endif %}
26
+
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
+ {{- system_message }}
44
+ {{- "<|eot_id|>" }}
45
+
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
+ {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
63
+ {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
65
+ {%- endif %}
66
+
67
+ {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
+ {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
+ {{- "<|eot_id|>" }}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
93
+ {%- endif %}
ru,en/lora_orpheus_gothic/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/dkulemin/lora_orpheus_gothic
ru,en/lora_orpheus_gothic/special_tokens_map.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|audio|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<|begin_of_text|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "<|eot_id|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "pad_token": {
20
+ "content": "<|finetune_right_pad_id|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ }
26
+ }
ru,en/lora_orpheus_gothic/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3fecb199b4170636dbfab986d25f628157268d37b861f9cadaca60b1353bce
3
+ size 22849547
ru,en/lora_orpheus_gothic/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff