niobures commited on
Commit
907609b
·
verified ·
1 Parent(s): 415c15e

Orpheus (hi, hi/en)

Browse files
Files changed (32) hide show
  1. .gitattributes +5 -0
  2. hi,en/Orpheus-tts-hi/.gitattributes +37 -0
  3. hi,en/Orpheus-tts-hi/README.md +361 -0
  4. hi,en/Orpheus-tts-hi/audio/output_1.wav +3 -0
  5. hi,en/Orpheus-tts-hi/audio/output_2.wav +3 -0
  6. hi,en/Orpheus-tts-hi/chat_template.jinja +93 -0
  7. hi,en/Orpheus-tts-hi/config.json +37 -0
  8. hi,en/Orpheus-tts-hi/generation_config.json +11 -0
  9. hi,en/Orpheus-tts-hi/model-00001-of-00003.safetensors +3 -0
  10. hi,en/Orpheus-tts-hi/model-00002-of-00003.safetensors +3 -0
  11. hi,en/Orpheus-tts-hi/model-00003-of-00003.safetensors +3 -0
  12. hi,en/Orpheus-tts-hi/model.safetensors.index.json +261 -0
  13. hi,en/Orpheus-tts-hi/source.txt +1 -0
  14. hi,en/Orpheus-tts-hi/special_tokens_map.json +20 -0
  15. hi,en/Orpheus-tts-hi/tokenizer.json +3 -0
  16. hi,en/Orpheus-tts-hi/tokenizer_config.json +0 -0
  17. hi,en/new_orpheus/.gitattributes +36 -0
  18. hi,en/new_orpheus/README.md +88 -0
  19. hi,en/new_orpheus/chat_template.jinja +93 -0
  20. hi,en/new_orpheus/config.json +37 -0
  21. hi,en/new_orpheus/generation_config.json +11 -0
  22. hi,en/new_orpheus/model-00001-of-00002.safetensors +3 -0
  23. hi,en/new_orpheus/model-00002-of-00002.safetensors +3 -0
  24. hi,en/new_orpheus/model.safetensors.index.json +262 -0
  25. hi,en/new_orpheus/source.txt +1 -0
  26. hi,en/new_orpheus/special_tokens_map.json +20 -0
  27. hi,en/new_orpheus/tokenizer.json +3 -0
  28. hi,en/new_orpheus/tokenizer_config.json +0 -0
  29. hi/Orpheus-3b-Hindi-FT-Q8_0.gguf/.gitattributes +37 -0
  30. hi/Orpheus-3b-Hindi-FT-Q8_0.gguf/Orpheus-3b-Hindi-FT-Q8_0.gguf +3 -0
  31. hi/Orpheus-3b-Hindi-FT-Q8_0.gguf/README.md +120 -0
  32. hi/Orpheus-3b-Hindi-FT-Q8_0.gguf/source.txt +1 -0
.gitattributes CHANGED
@@ -102,3 +102,8 @@ zh/Orpheus-3b-Chinese-FT-Q8_0.gguf/Orpheus-3b-Chinese-FT-Q8_0.gguf filter=lfs di
102
  fr/Orpheus-3b-French-FT-Q8_0.gguf/Orpheus-3b-French-FT-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
103
  fr/Orpheus-Cml-FR[[:space:]](Mehdi75)/tokenizer.json filter=lfs diff=lfs merge=lfs -text
104
  fr/Orpheus-Cml-FR[[:space:]](kadirnar)/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
102
  fr/Orpheus-3b-French-FT-Q8_0.gguf/Orpheus-3b-French-FT-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
103
  fr/Orpheus-Cml-FR[[:space:]](Mehdi75)/tokenizer.json filter=lfs diff=lfs merge=lfs -text
104
  fr/Orpheus-Cml-FR[[:space:]](kadirnar)/tokenizer.json filter=lfs diff=lfs merge=lfs -text
105
+ hi,en/new_orpheus/tokenizer.json filter=lfs diff=lfs merge=lfs -text
106
+ hi,en/Orpheus-tts-hi/audio/output_1.wav filter=lfs diff=lfs merge=lfs -text
107
+ hi,en/Orpheus-tts-hi/audio/output_2.wav filter=lfs diff=lfs merge=lfs -text
108
+ hi,en/Orpheus-tts-hi/tokenizer.json filter=lfs diff=lfs merge=lfs -text
109
+ hi/Orpheus-3b-Hindi-FT-Q8_0.gguf/Orpheus-3b-Hindi-FT-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
hi,en/Orpheus-tts-hi/.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.json filter=lfs diff=lfs merge=lfs -text
37
+ *.wav filter=lfs diff=lfs merge=lfs -text
hi,en/Orpheus-tts-hi/README.md ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ - hi
6
+ library_name: transformers
7
+ tags:
8
+ - text-to-speech
9
+ - tts
10
+ - hindi
11
+ - english
12
+ - llama
13
+ - audio
14
+ - speech
15
+ - india
16
+ datasets:
17
+ - proprietary
18
+ pipeline_tag: text-to-speech
19
+ co2_eq_emissions:
20
+ emissions: 0
21
+ source: "Not specified"
22
+ training_type: "unknown"
23
+ geographical_location: "unknown"
24
+ ---
25
+
26
+ # Text to Speech for Indian Languages
27
+
28
+ It is a state-of-the-art neural text-to-speech (TTS) model specifically designed for Indian languages. Built on a Llama architecture backbone, It generates natural, expressive speech in Hindi and English with remarkable quality and ultra-low latency.
29
+
30
+ ## Model Overview
31
+
32
+ A 3B parameter autoregressive transformer model based on the Llama architecture. It is designed to synthesize high-quality speech from text in Hindi and English, including code-mixed scenarios. The model outputs audio at a 24kHz sampling rate using the SNAC neural codec.
33
+
34
+ * **Model type:** Autoregressive Transformer
35
+ * **Base Architecture:** Llama (3B parameters)
36
+ * **Languages:** Hindi, English
37
+ * **Audio Codec:** SNAC @ 24kHz
38
+ * **License:** Apache 2.0
39
+ * **Developed by:** [email protected], [email protected]
40
+ * **Model URL:** [https://huggingface.co/SachinTelecmi/Orpheus-tts-hi/tree/main](https://huggingface.co/SachinTelecmi/Orpheus-tts-hi/tree/main)
41
+
42
+ ## Key Features
43
+
44
+ * **Multilingual Support:** Native Hindi and English capabilities with code-mixed support.
45
+ * **Ultra-Fast Inference:** Sub-200ms latency on A100-80GB GPUs.
46
+ * **High-Quality Audio:** 24kHz output with the SNAC neural codec.
47
+ * **Production-Ready:** Optimized for real-world deployment with 4-bit quantization support.
48
+
49
+ ## How to Get Started with the Model
50
+
51
+ ### Installation
52
+
53
+ To use this model, you need to install the `transformers`, `torch`, `torchaudio`, `snac`, and `bitsandbytes` libraries.
54
+
55
+ ```bash
56
+ pip install transformers torch torchaudio
57
+ pip install snac bitsandbytes # For audio decoding and quantization
58
+ ```
59
+
60
+ ### Basic Usage
61
+
62
+ The following Python code demonstrates how to generate speech from text using with 4-bit quantization for efficient inference.
63
+
64
+ ```python
65
+ import torch
66
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
67
+ from snac import SNAC
68
+ import soundfile as sf
69
+
70
+ # Model configuration for 4-bit inference
71
+ quantization_config = BitsAndBytesConfig(
72
+ load_in_4bit=True,
73
+ bnb_4bit_quant_type="nf4",
74
+ bnb_4bit_compute_dtype=torch.bfloat16,
75
+ bnb_4bit_use_double_quant=True,
76
+ )
77
+
78
+ # Load model and tokenizer
79
+ model = AutoModelForCausalLM.from_pretrained(
80
+ "SachinTelecmi/Orpheus-tts-hi",
81
+ quantization_config=quantization_config,
82
+ device_map="auto",
83
+ trust_remote_code=True,
84
+ )
85
+ tokenizer = AutoTokenizer.from_pretrained("SachinTelecmi/Orpheus-tts-hi", trust_remote_code=True)
86
+
87
+ # Initialize SNAC decoder
88
+ snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz").eval().cuda()
89
+
90
+ END_OF_SPEECH_TOKEN = 128258
91
+ START_OF_HUMAN_TOKEN = 128259
92
+ END_OF_HUMAN_TOKEN = 128260
93
+ START_OF_AI_TOKEN = 128261
94
+ END_OF_AI_TOKEN = 128262
95
+ AUDIO_CODE_BASE_OFFSET = 128266
96
+
97
+ # Available speakers
98
+ speakers = None
99
+
100
+ def generate_speech(text, speaker=None, temperature=0.4, top_p=0.9):
101
+ """Generate speech from text using specified speaker voice"""
102
+
103
+ # Prepare input with speaker token
104
+ prompt = f"{text}"
105
+ prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
106
+
107
+ input_tokens = [
108
+ START_OF_HUMAN_TOKEN,
109
+ *prompt_tokens,
110
+ END_OF_HUMAN_TOKEN,
111
+ START_OF_AI_TOKEN,
112
+ START_OF_SPEECH_TOKEN
113
+ ]
114
+
115
+ input_ids = torch.tensor([input_tokens], device=model.device)
116
+
117
+ # Calculate max tokens based on text length
118
+ max_tokens = min(int(len(text) * 1.3) * 7 + 21, 700)
119
+
120
+ # Generate audio tokens
121
+ with torch.no_grad():
122
+ output = model.generate(
123
+ input_ids,
124
+ max_new_tokens=max_tokens,
125
+ do_sample=True,
126
+ temperature=temperature,
127
+ top_p=top_p,
128
+ repetition_penalty=1.05,
129
+ pad_token_id=tokenizer.pad_token_id,
130
+ eos_token_id=[END_OF_SPEECH_TOKEN, END_OF_AI_TOKEN]
131
+ )
132
+
133
+ # Extract SNAC tokens
134
+ generated_ids = output[0][len(input_tokens):].tolist()
135
+ snac_tokens = [
136
+ token_id for token_id in generated_ids
137
+ if AUDIO_CODE_BASE_OFFSET <= token_id < (AUDIO_CODE_BASE_OFFSET + 7 * 4096)
138
+ ]
139
+
140
+ if not snac_tokens:
141
+ raise ValueError("No audio tokens generated")
142
+
143
+ # Decode audio
144
+ audio = decode_snac_tokens(snac_tokens, snac_model)
145
+ return audio
146
+
147
+ def decode_snac_tokens(snac_tokens, snac_model):
148
+ """De-interleave and decode SNAC tokens to audio"""
149
+ if not snac_tokens or len(snac_tokens) % 7 != 0:
150
+ return None
151
+
152
+ # De-interleave tokens into 3 hierarchical levels
153
+ codes_lvl = [[] for _ in range(3)]
154
+ llm_codebook_offsets = [AUDIO_CODE_BASE_OFFSET + i * 4096 for i in range(7)]
155
+
156
+ for i in range(0, len(snac_tokens), 7):
157
+ # Level 0: Coarse (1 token)
158
+ codes_lvl[0].append(snac_tokens[i] - llm_codebook_offsets[0])
159
+ # Level 1: Medium (2 tokens)
160
+ codes_lvl[1].append(snac_tokens[i+1] - llm_codebook_offsets[1])
161
+ codes_lvl[1].append(snac_tokens[i+4] - llm_codebook_offsets[4])
162
+ # Level 2: Fine (4 tokens)
163
+ codes_lvl[2].append(snac_tokens[i+2] - llm_codebook_offsets[2])
164
+ codes_lvl[2].append(snac_tokens[i+3] - llm_codebook_offsets[3])
165
+ codes_lvl[2].append(snac_tokens[i+5] - llm_codebook_offsets[5])
166
+ codes_lvl[2].append(snac_tokens[i+6] - llm_codebook_offsets[6])
167
+
168
+ # Convert to tensors for SNAC decoder
169
+ hierarchical_codes = []
170
+ for lvl_codes in codes_lvl:
171
+ tensor = torch.tensor(lvl_codes, dtype=torch.int32, device=snac_model.device).unsqueeze(0)
172
+ if torch.any((tensor < 0) | (tensor > 4095)):
173
+ raise ValueError("Invalid SNAC token values")
174
+ hierarchical_codes.append(tensor)
175
+
176
+ # Decode with SNAC
177
+ with torch.no_grad():
178
+ audio_hat = snac_model.decode(hierarchical_codes)
179
+
180
+ return audio_hat.squeeze().clamp(-1, 1).cpu().numpy()
181
+
182
+
183
+
184
+
185
+
186
+ # --- Example Usage ---
187
+
188
+ # code-mixed
189
+ prompt ='''Delhi की एक retail chain ने हमारे solutions से अपनी sales में 30% तक वृद्धि देखी है। <hmm..> उनका feedback बहुत encouraging रहा है ।'''
190
+ audio = generate_speech(prompt)
191
+ sf.write("output_1.wav", audio, 24000)
192
+
193
+ prompt = '''जी हाँ, हमारे pricing plans काफी flexible हैं <breath>
194
+ आप pay as you go या fixed subscription में से choose कर सकते हैं, '''
195
+ audio = generate_speech(prompt)
196
+ sf.write("output_2.wav", audio, 24000)
197
+
198
+ ```
199
+
200
+ #### Streaming Inference Example
201
+ 1. Clone this repo
202
+ ```bash
203
+ git clone https://github.com/telecmi/Orpheus-TTS
204
+ ```
205
+ 2. Navigate and install packages
206
+ ```bash
207
+ cd Orpheus-TTS && pip install orpheus-speech # uses vllm under the hood for fast inference
208
+ ```
209
+ vllm pushed a slightly buggy version on March 18th so some bugs are being resolved by reverting to `pip install vllm==0.7.3` after `pip install orpheus-speech`
210
+
211
+
212
+ 4. Run the example below:
213
+ ```python
214
+ from orpheus_tts import OrpheusModel
215
+ import wave
216
+ import time
217
+
218
+ ## checkpoints folder form huggingface
219
+ ## https://huggingface.co/SachinTelecmi/Orpheus-tts-hi
220
+
221
+ model = OrpheusModel(model_name ="checkpoints", max_model_len=2048)
222
+ prompt ='''Delhi की एक retail chain ने हमारे solutions से अपनी sales में 30% तक वृद्धि देखी है। <hmm..> उनका feedback बहुत encouraging रहा है ।'''
223
+ filename = "prompt-hi.wav"
224
+ start_time = time.monotonic()
225
+ syn_tokens = model.generate_speech(
226
+ prompt=prompt,
227
+ voice=None,
228
+ )
229
+
230
+ with wave.open(filename, "wb") as wf:
231
+ wf.setnchannels(1)
232
+ wf.setsampwidth(2)
233
+ wf.setframerate(24000)
234
+
235
+ total_frames = 0
236
+ chunk_counter = 0
237
+ for audio_chunk in syn_tokens: # output streaming
238
+ chunk_counter += 1
239
+ frame_count = len(audio_chunk) // (wf.getsampwidth() * wf.getnchannels())
240
+ total_frames += frame_count
241
+ wf.writeframes(audio_chunk)
242
+ duration = total_frames / wf.getframerate()
243
+
244
+ end_time = time.monotonic()
245
+ print(f"It took {end_time - start_time} seconds to generate {duration:.2f} seconds of audio")
246
+ Audio(filename)
247
+
248
+ # inference script is in Orpheus-TTS/realtime_streaming_example/streaming.py
249
+
250
+ ```
251
+ ### Samples
252
+ [▶️ Listen: Output_1.wav](https://huggingface.co/SachinTelecmi/Orpheus-tts-hi/resolve/main/audio/output_1.wav) <br>
253
+
254
+ [▶️ Listen: Output_2.wav](https://huggingface.co/SachinTelecmi/Orpheus-tts-hi/resolve/main/audio/output_2.wav)
255
+
256
+ # Voice Cloning Example
257
+ ```python
258
+ # check https://github.com/telecmi/Orpheus-TTS/tree/main/voice_clone/clone.py script
259
+
260
+ from voice_clone import OrpheusTTSVoiceClone
261
+ from pathlib import Path
262
+
263
+ voice_cloner = OrpheusTTSVoiceClone(model_name = "SachinTelecmi/Orpheus-tts-hi",device="cuda")
264
+
265
+ # Text to synthesize
266
+ target_texts = [
267
+ "Hi IIT madras is currently doing great for indian research and its proud to be associated with it."
268
+ ]
269
+
270
+ reference_pairs = [(".voice_clone/input_reference.wav",
271
+ "Delhi की एक retail chain ने हमारे solutions से अपनी sales में 30% तक वृद्धि देखी है। <hmm..> उनका feedback बहुत encouraging रहा है ।")]
272
+ # Process each reference
273
+ for audio_path, transcript in reference_pairs:
274
+ print(f"Processing reference: {audio_path} - {transcript}")
275
+
276
+ # Clone voice
277
+ cloned_audio = voice_cloner.clone_voice(audio_path, transcript, target_texts)
278
+
279
+ # Prepare output paths
280
+ audio_stem = Path(audio_path).stem
281
+ output_dir = Path(audio_path).parent / "inference"
282
+ output_paths = [
283
+ str(output_dir / f"{audio_stem}_{i}.wav")
284
+ for i in range(len(target_texts))
285
+ ]
286
+
287
+ # Save cloned audio
288
+ voice_cloner.save_audio(cloned_audio, output_paths)
289
+
290
+ ```
291
+
292
+
293
+
294
+ ## Uses
295
+
296
+ It is ideal for a wide range of applications requiring high-quality, low-latency speech synthesis for Indian languages, including:
297
+
298
+ * **Accessibility:** Screen readers and voice-enabled assistance for visually impaired users.
299
+ * **Customer Service:** IVR systems, voice bots, and automated announcements.
300
+ * **Content Creation:** Dubbing for videos, e-learning materials, and audiobooks.
301
+ * **Automotive:** In-car navigation and infotainment systems.
302
+ * **Edge Devices:** Voice-enabled smart devices and IoT applications.
303
+ * **Real time streaming** Supports real time streaming with vllm TTFB around < 200ms on a100
304
+
305
+ ## Technical Improvements
306
+ * To get the ultralow latency, the fundamental solution would be warming up the SNAC decoder which is the bottleneck here!! while implementing vllm to improve the latency we have to warm up each and every module to get the better performance.
307
+
308
+ ### Architecture
309
+
310
+ It leverages a 3B parameter transformer-based architecture with several key innovations:
311
+
312
+ * **Base Architecture:** Llama-style autoregressive transformer (3B parameters)
313
+ * **Audio Codec:** SNAC (24kHz) for high-quality audio token generation
314
+ * **Speaker Conditioning:** Special Non Speech tokens (`<hmm..>`, `<breath>`, `<think>` etc..)
315
+ * **Parameter-Efficient Training:** LoRA adaptation with differentiated ranks for attention and FFN modules.
316
+ * **Context Length:** 4096 tokens
317
+
318
+ ### Training
319
+
320
+ #### Training Infrastructure
321
+
322
+ * **Hardware:** 1× NVIDIA A100 80GB GPUs
323
+ * **Precision:** BF16 mixed precision training with gradient checkpointing using Unsloth library
324
+ * **Memory Optimization:** 4-bit quantization
325
+
326
+ #### Training Configuration
327
+ Full fine tuning
328
+
329
+ #### Training Data
330
+
331
+ It was trained on **proprietary, high-quality datasets** specifically curated for Indian language TTS.
332
+
333
+ * **Data Volume:** 4000 audio utterances of a single speaker
334
+ * **Languages:** Native Hindi and English utterances with code-mixed support
335
+ * **Speaker Diversity:** 1 professional voice artist with distinct characteristics
336
+ * **Audio Quality:** Studio-grade recordings at 24kHz sampling rate
337
+ * **Content Diversity:** Conversational, narrative, expressive, and informational styles
338
+
339
+ **Note:** The training datasets are proprietary and not publicly available.
340
+ |
341
+
342
+ ## Risks, Limitations and Biases
343
+
344
+ * **Language Support:** Currently supports only Hindi and English. Performance on other Indian languages is not guaranteed.
345
+ * **Speaker Diversity:** Limited to 1 speaker voice, which may not represent the full diversity of Indian accents and dialects.
346
+ * **Hardware Requirements:** Requires a GPU for real-time or near-real-time inference. CPU performance will be significantly slower.
347
+ * **Input Length:** The model is limited to a maximum input length of 2048 tokens.
348
+ * **Bias:** The model's performance and voice characteristics are a reflection of the proprietary training data. It may exhibit biases present in the data.
349
+
350
+ ## Future Updates
351
+
352
+ actively working on expanding its capabilities:
353
+
354
+ * Support for Odia, Tamil, Telugu, Bengali, Marathi, and other Indian languages.
355
+ * Additional speaker voices with regional accents.
356
+ * Emotion and prosody control tokens.
357
+ * CPU optimization for edge deployment.
358
+ * Served with TensorRT-LLM engine (On-going)
359
+
360
+ ## Acknowledgments
361
+ This project builds on [unsloth/orpheus-3b-0.1-ft](https://huggingface.co/unsloth/orpheus-3b-0.1-ft) by Unsloth.
hi,en/Orpheus-tts-hi/audio/output_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3c9c532c7bdf66ed3698d92653ba37f61b4d45d21ce21d87f4e7398ab1f8a12
3
+ size 524332
hi,en/Orpheus-tts-hi/audio/output_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65cb88b1aa616493d272cc0fd770b5e27ab95b16b6590aa164be7425cd9d1e25
3
+ size 385068
hi,en/Orpheus-tts-hi/chat_template.jinja ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
+ {%- set messages = messages[1:] %}
23
+ {%- else %}
24
+ {%- set system_message = "" %}
25
+ {%- endif %}
26
+
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
+ {{- system_message }}
44
+ {{- "<|eot_id|>" }}
45
+
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
+ {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
63
+ {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
65
+ {%- endif %}
66
+
67
+ {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
+ {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
+ {{- "<|eot_id|>" }}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
93
+ {%- endif %}
hi,en/Orpheus-tts-hi/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": 128001,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 3072,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 8192,
14
+ "max_position_embeddings": 131072,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 24,
18
+ "num_hidden_layers": 28,
19
+ "num_key_value_heads": 8,
20
+ "pad_token_id": 128004,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": {
24
+ "factor": 32.0,
25
+ "high_freq_factor": 4.0,
26
+ "low_freq_factor": 1.0,
27
+ "original_max_position_embeddings": 8192,
28
+ "rope_type": "llama3"
29
+ },
30
+ "rope_theta": 500000.0,
31
+ "tie_word_embeddings": true,
32
+ "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.54.1",
34
+ "unsloth_version": "2025.8.1",
35
+ "use_cache": true,
36
+ "vocab_size": 156940
37
+ }
hi,en/Orpheus-tts-hi/generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "do_sample": true,
5
+ "eos_token_id": 128001,
6
+ "max_length": 131072,
7
+ "pad_token_id": 128004,
8
+ "temperature": 0.6,
9
+ "top_p": 0.9,
10
+ "transformers_version": "4.54.1"
11
+ }
hi,en/Orpheus-tts-hi/model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aa6f4359897560da74b751b3968ed99b997c31fc6deb6c7884c09a80f8512da
3
+ size 3438608176
hi,en/Orpheus-tts-hi/model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5760f6de43d027b00224eef13f31492e189707157308328c2f62187f1d059ba2
3
+ size 2466558064
hi,en/Orpheus-tts-hi/model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:443996651a974406798cbdf78ee6aa1d2ce0bffc91d577f42294af8fa74ad551
3
+ size 1661186464
hi,en/Orpheus-tts-hi/model.safetensors.index.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 13203468288
4
+ },
5
+ "weight_map": {
6
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
7
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
26
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
29
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
30
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
32
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
33
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
34
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
35
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00003.safetensors",
107
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
108
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
111
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
116
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00003.safetensors",
125
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
126
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
127
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
128
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
129
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
130
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
131
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
132
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
133
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00003.safetensors",
134
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
135
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
136
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
137
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
138
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
139
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
140
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
141
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
142
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
143
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
149
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
150
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
151
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
152
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
197
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
198
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
199
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
200
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
201
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
202
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
203
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
204
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
205
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
206
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
207
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
208
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
209
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
210
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
211
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
212
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
213
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
214
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
215
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
225
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
226
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
227
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
228
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
229
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
230
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
231
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
232
+ "model.layers.7.input_layernorm.weight": "model-00002-of-00003.safetensors",
233
+ "model.layers.7.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
234
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
235
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
236
+ "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
237
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
238
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
239
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
240
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
241
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00003.safetensors",
242
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
243
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
244
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
245
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
246
+ "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
247
+ "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
248
+ "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
249
+ "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
250
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00003.safetensors",
251
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
252
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
253
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
254
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
255
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
256
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
257
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
258
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
259
+ "model.norm.weight": "model-00003-of-00003.safetensors"
260
+ }
261
+ }
hi,en/Orpheus-tts-hi/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/SachinTelecmi/Orpheus-tts-hi
hi,en/Orpheus-tts-hi/special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|audio|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<|begin_of_text|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "<|eot_id|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "pad_token": "<custom_token_7>"
20
+ }
hi,en/Orpheus-tts-hi/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3fecb199b4170636dbfab986d25f628157268d37b861f9cadaca60b1353bce
3
+ size 22849547
hi,en/Orpheus-tts-hi/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
hi,en/new_orpheus/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
hi,en/new_orpheus/README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: llama3.2
3
+ base_model: canopylabs/3b-hi-pretrain-research_release
4
+ tags:
5
+ - text-to-speech
6
+ - hindi
7
+ - hinglish
8
+ - audio-generation
9
+ - fine-tuned
10
+ - unsloth
11
+ language:
12
+ - hi
13
+ - en
14
+ pipeline_tag: text-generation
15
+ ---
16
+
17
+ # Hinglish TTS 3B Model
18
+
19
+ This is a fine-tuned version of [canopylabs/3b-hi-pretrain-research_release](https://huggingface.co/canopylabs/3b-hi-pretrain-research_release) specialized for Hinglish (Hindi-English mixed) text-to-speech generation.
20
+
21
+ ## Model Details
22
+
23
+ - **Base Model**: canopylabs/3b-hi-pretrain-research_release
24
+ - **Fine-tuning Method**: LoRA with Unsloth (merged)
25
+ - **Languages**: Hindi, English, Hinglish
26
+ - **Task**: Text-to-Speech via audio token generation
27
+ - **Model Size**: ~3B parameters
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from transformers import AutoTokenizer, AutoModelForCausalLM
33
+ import torch
34
+
35
+ # Load model and tokenizer
36
+ model_name = "Itsharshi/new_orpheus"
37
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
38
+ model = AutoModelForCausalLM.from_pretrained(
39
+ model_name,
40
+ torch_dtype=torch.float16,
41
+ device_map="auto"
42
+ )
43
+
44
+ # Generate text
45
+ prompt = "Hello doston, main aapka dost hun"
46
+ inputs = tokenizer(prompt, return_tensors="pt")
47
+ outputs = model.generate(**inputs, max_new_tokens=1200)
48
+ ```
49
+
50
+ ## Fine-tuning Details
51
+
52
+ - **LoRA Rank**: 64
53
+ - **LoRA Alpha**: 64
54
+ - **Target Modules**: q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
55
+ - **Training Framework**: Unsloth
56
+
57
+ ## Audio Generation
58
+
59
+ This model generates audio tokens that need to be decoded using a SNAC (Scalable Neural Audio Codec) model:
60
+
61
+ ```python
62
+ from snac import SNAC
63
+
64
+ # Load SNAC decoder
65
+ snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz")
66
+
67
+ # Process generated tokens to audio codes and decode
68
+ # (See full implementation in the original training code)
69
+ ```
70
+
71
+ ## Limitations
72
+
73
+ - Requires SNAC model for audio generation
74
+ - Optimized for Hinglish content
75
+ - May not perform well on pure English or pure Hindi in some cases
76
+
77
+ ## Citation
78
+
79
+ If you use this model, please cite the original base model:
80
+
81
+ ```bibtex
82
+ @misc{canopylabs-3b-hi,
83
+ title={3B Hindi Pretrained Model},
84
+ author={Canopy Labs},
85
+ year={2024},
86
+ url={https://huggingface.co/canopylabs/3b-hi-pretrain-research_release}
87
+ }
88
+ ```
hi,en/new_orpheus/chat_template.jinja ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
+ {%- set messages = messages[1:] %}
23
+ {%- else %}
24
+ {%- set system_message = "" %}
25
+ {%- endif %}
26
+
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
+ {{- system_message }}
44
+ {{- "<|eot_id|>" }}
45
+
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
+ {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
63
+ {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
65
+ {%- endif %}
66
+
67
+ {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
+ {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
+ {{- "<|eot_id|>" }}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
93
+ {%- endif %}
hi,en/new_orpheus/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": 128001,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 3072,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 8192,
14
+ "max_position_embeddings": 131072,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 24,
18
+ "num_hidden_layers": 28,
19
+ "num_key_value_heads": 8,
20
+ "pad_token_id": 128004,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": {
24
+ "factor": 32.0,
25
+ "high_freq_factor": 4.0,
26
+ "low_freq_factor": 1.0,
27
+ "original_max_position_embeddings": 8192,
28
+ "rope_type": "llama3"
29
+ },
30
+ "rope_theta": 500000.0,
31
+ "tie_word_embeddings": true,
32
+ "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.55.2",
34
+ "unsloth_version": "2025.8.6",
35
+ "use_cache": true,
36
+ "vocab_size": 156940
37
+ }
hi,en/new_orpheus/generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "do_sample": true,
5
+ "eos_token_id": 128001,
6
+ "max_length": 131072,
7
+ "pad_token_id": 128004,
8
+ "temperature": 0.6,
9
+ "top_p": 0.9,
10
+ "transformers_version": "4.55.2"
11
+ }
hi,en/new_orpheus/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8dfc8ec28ceca6f6eb66bcace117e362639db6abda7e7baa183fee75c28a9d7
3
+ size 4991037968
hi,en/new_orpheus/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93526aef86b9b322fcb7e901be3df10cbe5dbf21a30c07b3e4051983a744ec36
3
+ size 1610725592
hi,en/new_orpheus/model.safetensors.index.json ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 3300867072,
4
+ "total_size": 6601734144
5
+ },
6
+ "weight_map": {
7
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
197
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
198
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
199
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
200
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
201
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
202
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
203
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
204
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
205
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
206
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
207
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
208
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
209
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
210
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
211
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
212
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
213
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
214
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
215
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
216
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
217
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
218
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
219
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
220
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
221
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
222
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
223
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
224
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
225
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
226
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
227
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
228
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
229
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
230
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
231
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
232
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
233
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
234
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
235
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
236
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
237
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
238
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
239
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
240
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
241
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
242
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
243
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
244
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
245
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
246
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
247
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
248
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
249
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
250
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
251
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
252
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
253
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
254
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
255
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
256
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
257
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
258
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
259
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
260
+ "model.norm.weight": "model-00002-of-00002.safetensors"
261
+ }
262
+ }
hi,en/new_orpheus/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Itsharshi/new_orpheus
hi,en/new_orpheus/special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|audio|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<|begin_of_text|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "<|eot_id|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "pad_token": "<|finetune_right_pad_id|>"
20
+ }
hi,en/new_orpheus/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3fecb199b4170636dbfab986d25f628157268d37b861f9cadaca60b1353bce
3
+ size 22849547
hi,en/new_orpheus/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
hi/Orpheus-3b-Hindi-FT-Q8_0.gguf/.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 3b-hi-ft-research_release-q8_0.gguf filter=lfs diff=lfs merge=lfs -text
37
+ Orpheus-3b-Hindi-FT-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
hi/Orpheus-3b-Hindi-FT-Q8_0.gguf/Orpheus-3b-Hindi-FT-Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9afbc3faabd9470c888f6c548468521f24b04f655dde21f641bade1ac2c6c70c
3
+ size 3516430784
hi/Orpheus-3b-Hindi-FT-Q8_0.gguf/README.md ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - hi
4
+ tags:
5
+ - text-to-speech
6
+ - tts
7
+ - audio
8
+ - speech-synthesis
9
+ - orpheus
10
+ - gguf
11
+ license: apache-2.0
12
+ datasets:
13
+ - internal
14
+ ---
15
+
16
+ # Orpheus-3b-FT-Q8_0
17
+
18
+ This is a quantised version of [canopylabs/3b-hi-ft-research_release](https://huggingface.co/canopylabs/3b-hi-ft-research_release).
19
+
20
+ Orpheus is a high-performance Text-to-Speech model fine-tuned for natural, emotional speech synthesis. This repository hosts the 8-bit quantised version of the 3B parameter model, optimised for efficiency while maintaining high-quality output.
21
+
22
+ ## Model Description
23
+
24
+ **Orpheus-3b-FT-Q8_0** is a 3 billion parameter Text-to-Speech model that converts text inputs into natural-sounding speech with support for multiple voices and emotional expressions. The model has been quantised to 8-bit (Q8_0) format for efficient inference, making it accessible on consumer hardware.
25
+
26
+ Key features:
27
+ - 1 distinct voice option with different characteristics
28
+ - Support for emotion tags like laughter, sighs, etc.
29
+ - Optimised for CUDA acceleration on RTX GPUs
30
+ - Produces high-quality 24kHz mono audio
31
+ - Fine-tuned for conversational naturalness
32
+
33
+ ## How to Use
34
+
35
+ This model is designed to be used with an LLM inference server that connects to the [Orpheus-FastAPI](https://github.com/Lex-au/Orpheus-FastAPI) frontend, which provides both a web UI and OpenAI-compatible API endpoints.
36
+
37
+ ### Compatible Inference Servers
38
+
39
+ This quantised model can be loaded into any of these LLM inference servers:
40
+
41
+ - [GPUStack](https://github.com/gpustack/gpustack) - GPU optimised LLM inference server (My pick) - supports LAN/WAN tensor split parallelisation
42
+ - [LM Studio](https://lmstudio.ai/) - Load the GGUF model and start the local server
43
+ - [llama.cpp server](https://github.com/ggerganov/llama.cpp) - Run with the appropriate model parameters
44
+ - Any compatible OpenAI API-compatible server
45
+
46
+ ### Quick Start
47
+
48
+ 1. Download this quantised model from [lex-au's Orpheus-FASTAPI collection](https://huggingface.co/collections/lex-au/orpheus-fastapi-67e125ae03fc96dae0517707)
49
+
50
+ 2. Load the model in your preferred inference server and start the server.
51
+
52
+ 3. Clone the Orpheus-FastAPI repository:
53
+ ```bash
54
+ git clone https://github.com/Lex-au/Orpheus-FastAPI.git
55
+ cd Orpheus-FastAPI
56
+ ```
57
+
58
+ 4. Configure the FastAPI server to connect to your inference server by setting the `ORPHEUS_API_URL` environment variable.
59
+
60
+ 5. Follow the complete installation and setup instructions in the [repository README](https://github.com/Lex-au/Orpheus-FastAPI).
61
+
62
+ ### Available Voices
63
+
64
+ The model supports 1 voice:
65
+ - `ऋतिका`: Female, Hindi, expressive
66
+
67
+ ### Emotion Tags
68
+
69
+ You can add expressiveness to speech by inserting tags:
70
+ - `<laugh>`, `<chuckle>`: For laughter sounds
71
+ - `<sigh>`: For sighing sounds
72
+ - `<cough>`, `<sniffle>`: For subtle interruptions
73
+ - `<groan>`, `<yawn>`, `<gasp>`: For additional emotional expression
74
+
75
+ ## Technical Specifications
76
+
77
+ - **Architecture**: Specialised token-to-audio sequence model
78
+ - **Parameters**: ~3 billion
79
+ - **Quantisation**: 8-bit (GGUF Q8_0 format)
80
+ - **Audio Sample Rate**: 24kHz
81
+ - **Input**: Text with optional voice selection and emotion tags
82
+ - **Output**: High-quality WAV audio
83
+ - **Language**: Hindi
84
+ - **Hardware Requirements**: CUDA-compatible GPU (recommended: RTX series)
85
+ - **Integration Method**: External LLM inference server + Orpheus-FastAPI frontend
86
+
87
+ ## Limitations
88
+
89
+ - Currently supports English text only
90
+ - Best performance achieved on CUDA-compatible GPUs
91
+ - Generation speed depends on GPU capability
92
+
93
+ ## License
94
+
95
+ This model is available under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0).
96
+
97
+ ## Citation & Attribution
98
+
99
+ The original Orpheus model was created by Canopy Labs. This repository contains a quantised version optimised for use with the Orpheus-FastAPI server.
100
+
101
+ If you use this quantised model in your research or applications, please cite:
102
+
103
+ ```
104
+ @misc{orpheus-tts-2025,
105
+ author = {Canopy Labs},
106
+ title = {Orpheus-3b-0.1-ft: Text-to-Speech Model},
107
+ year = {2025},
108
+ publisher = {HuggingFace},
109
+ howpublished = {\url{https://huggingface.co/canopylabs/orpheus-3b-0.1-ft}}
110
+ }
111
+
112
+ @misc{orpheus-quantised-2025,
113
+ author = {Lex-au},
114
+ title = {Orpheus-3b-FT-Q8_0: Quantised TTS Model with FastAPI Server},
115
+ note = {GGUF quantisation of canopylabs/orpheus-3b-0.1-ft},
116
+ year = {2025},
117
+ publisher = {HuggingFace},
118
+ howpublished = {\url{https://huggingface.co/lex-au/Orpheus-3b-FT-Q8_0.gguf}}
119
+ }
120
+ ```
hi/Orpheus-3b-Hindi-FT-Q8_0.gguf/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/lex-au/Orpheus-3b-Hindi-FT-Q8_0.gguf