saheedniyi commited on
Commit
c99a1c2
·
verified ·
1 Parent(s): 0df6b89

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +50 -29
README.md CHANGED
@@ -118,8 +118,7 @@ torchaudio.save(f"audio.wav", audio, sample_rate=24000)
118
  ```python
119
  !git clone https://github.com/saheedniyi02/yarngpt.git
120
 
121
- # install some necessary libraries
122
- !pip install outetts uroman trafilatura pydub
123
 
124
  import os
125
  import re
@@ -139,77 +138,99 @@ from pydub.effects import normalize
139
  from transformers import AutoModelForCausalLM, AutoTokenizer
140
  from outetts.wav_tokenizer.decoder import WavTokenizer
141
 
142
-
143
  !wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
144
  !wget https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt
145
 
146
- from yarngpt.audiotokenizer import AudioTokenizer
147
 
148
- tokenizer_path="saheedniyi/YarnGPT"
149
  wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
150
  wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
151
 
152
-
153
-
154
- audio_tokenizer=AudioTokenizer(
155
  tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
156
  )
157
 
158
-
159
  model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
160
 
161
-
162
  def split_text_into_chunks(text, word_limit=25):
163
- """
164
- Function to split a long web page into reasonable chunks
165
- """
166
  sentences=[sentence.strip() for sentence in text.split('.') if sentence.strip()]
167
  chunks=[]
168
  for sentence in sentences:
169
  chunks.append(".")
170
  sentence_splitted=sentence.split(" ")
171
  num_words=len(sentence_splitted)
172
- start_index=0
173
- if num_words>word_limit:
174
- while start_index<num_words:
175
- end_index=min(num_words,start_index+word_limit)
176
- chunks.append(" ".join(sentence_splitted[start_index:start_index+word_limit]))
177
- start_index=end_index
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  else:
179
  chunks.append(sentence)
180
  return chunks
181
 
182
- #Extracting the content of a webpage
183
- page=requests.get("https://punchng.com/expensive-feud-how-burna-boy-cubana-chief-priests-fight-led-to-dollar-rain/")
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  content=trafilatura.extract(page.text)
185
  chunks=split_text_into_chunks(content)
186
 
187
- #Looping over the chunks and adding creating a large `all_codes` list
188
  all_codes=[]
 
189
  for i,chunk in enumerate(chunks):
190
  print(i)
191
  print("\n")
192
  print(chunk)
193
  if chunk==".":
194
- #add silence for 0.25 seconds if we encounter a full stop
195
- all_codes.extend([453]*20)
196
  else:
197
- prompt=audio_tokenizer.create_prompt(chunk,"chinenye")
 
198
  input_ids=audio_tokenizer.tokenize_prompt(prompt)
199
  output = model.generate(
200
  input_ids=input_ids,
201
  temperature=0.1,
202
  repetition_penalty=1.1,
203
  max_length=4000,
 
204
  )
205
  codes=audio_tokenizer.get_codes(output)
206
  all_codes.extend(codes)
207
 
208
-
209
- # Converting to audio
210
  audio=audio_tokenizer.get_audio(all_codes)
211
  IPython.display.Audio(audio,rate=24000)
212
- torchaudio.save(f"news1.wav", audio, sample_rate=24000)
 
 
 
213
  ```
214
 
215
  ## Model Description
@@ -221,7 +242,7 @@ torchaudio.save(f"news1.wav", audio, sample_rate=24000)
221
  - **Repository:** [YarnGPT Github Repository](https://github.com/saheedniyi02/yarngpt)
222
  - **Paper:** IN PROGRESS.
223
  - **Demo:** 1) [Prompt YarnGPT2 notebook](https://colab.research.google.com/drive/1PYuCSpGZKmUS1nGGzdFWbnuM2t0jP24S?usp=sharing)
224
- 2) [Simple news reader](https://colab.research.google.com/drive/1SsXV08kly1TUJVM_NFpKqQWOZ1gUZpGe?usp=sharing)
225
 
226
 
227
 
 
118
  ```python
119
  !git clone https://github.com/saheedniyi02/yarngpt.git
120
 
121
+ pip install outetts uroman trafilatura pydub
 
122
 
123
  import os
124
  import re
 
138
  from transformers import AutoModelForCausalLM, AutoTokenizer
139
  from outetts.wav_tokenizer.decoder import WavTokenizer
140
 
 
141
  !wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
142
  !wget https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt
143
 
144
+ from yarngpt.audiotokenizer import AudioTokenizerV2
145
 
146
+ tokenizer_path="saheedniyi/YarnGPT2"
147
  wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
148
  wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
149
 
150
+ audio_tokenizer=AudioTokenizerV2(
 
 
151
  tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
152
  )
153
 
 
154
  model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
155
 
156
+ # Split text into chunks
157
  def split_text_into_chunks(text, word_limit=25):
 
 
 
158
  sentences=[sentence.strip() for sentence in text.split('.') if sentence.strip()]
159
  chunks=[]
160
  for sentence in sentences:
161
  chunks.append(".")
162
  sentence_splitted=sentence.split(" ")
163
  num_words=len(sentence_splitted)
164
+
165
+ if (num_words>word_limit) and (num_words<=word_limit*2):
166
+ chunks.append(" ".join(sentence_splitted[:int(num_words/2)]))
167
+ chunks.append(" ".join(sentence_splitted[int(num_words/2):]))
168
+ elif (num_words>word_limit*2) and (num_words<=word_limit*3):
169
+ chunks.append(" ".join(sentence_splitted[:int(num_words/3)]))
170
+ chunks.append(" ".join(sentence_splitted[int(num_words/3):int(2*num_words/3)]))
171
+ chunks.append(" ".join(sentence_splitted[int(2*num_words/3):]))
172
+ elif (num_words>word_limit*3) and (num_words<=word_limit*4):
173
+ chunks.append(" ".join(sentence_splitted[:int(num_words/4)]))
174
+ chunks.append(" ".join(sentence_splitted[int(num_words/4):word_limit*2]))
175
+ chunks.append(" ".join(sentence_splitted[int(2*num_words/4):int(3*num_words/4)]))
176
+ chunks.append(" ".join(sentence_splitted[int(3*num_words/4):]))
177
+ elif (num_words>word_limit*4) and (num_words<=word_limit*5):
178
+ chunks.append(" ".join(sentence_splitted[:int(num_words/5)]))
179
+ chunks.append(" ".join(sentence_splitted[int(num_words/5):int(2*num_words/5)]))
180
+ chunks.append(" ".join(sentence_splitted[int(2*num_words/5):int(3*num_words/5)]))
181
+ chunks.append(" ".join(sentence_splitted[int(3*num_words/5):int(4*num_words/5)]))
182
+ chunks.append(" ".join(sentence_splitted[int(4*num_words/5):]))
183
  else:
184
  chunks.append(sentence)
185
  return chunks
186
 
187
+ def speed_change(sound, speed=0.9):
188
+ # Manually override the frame_rate. This tells the computer how many
189
+ # samples to play per second
190
+ sound_with_altered_frame_rate = sound._spawn(sound.raw_data, overrides={
191
+ "frame_rate": int(sound.frame_rate * speed)
192
+ })
193
+ # convert the sound with altered frame rate to a standard frame rate
194
+ # so that regular playback programs will work right. They often only
195
+ # know how to play audio at standard frame rate (like 44.1k)
196
+ return sound_with_altered_frame_rate.set_frame_rate(sound.frame_rate)
197
+
198
+ #change the url
199
+ url="https://punchng.com/im-not-desperate-for-2027-presidential-ticket-obi/"
200
+
201
+ page=requests.get(url)
202
  content=trafilatura.extract(page.text)
203
  chunks=split_text_into_chunks(content)
204
 
 
205
  all_codes=[]
206
+ #Looping over the chunks and adding creating a large `all_codes` list
207
  for i,chunk in enumerate(chunks):
208
  print(i)
209
  print("\n")
210
  print(chunk)
211
  if chunk==".":
212
+ #add silence for 0.5 seconds if we encounter a full stop
213
+ all_codes.extend([453]*38)
214
  else:
215
+ # Change the language and voice here
216
+ prompt=audio_tokenizer.create_prompt(chunk,lang="english",speaker_name="jude")
217
  input_ids=audio_tokenizer.tokenize_prompt(prompt)
218
  output = model.generate(
219
  input_ids=input_ids,
220
  temperature=0.1,
221
  repetition_penalty=1.1,
222
  max_length=4000,
223
+ #num_beams=5,
224
  )
225
  codes=audio_tokenizer.get_codes(output)
226
  all_codes.extend(codes)
227
 
 
 
228
  audio=audio_tokenizer.get_audio(all_codes)
229
  IPython.display.Audio(audio,rate=24000)
230
+ torchaudio.save(f"news1.wav",
231
+ audio,
232
+ sample_rate=24000,
233
+ )
234
  ```
235
 
236
  ## Model Description
 
242
  - **Repository:** [YarnGPT Github Repository](https://github.com/saheedniyi02/yarngpt)
243
  - **Paper:** IN PROGRESS.
244
  - **Demo:** 1) [Prompt YarnGPT2 notebook](https://colab.research.google.com/drive/1PYuCSpGZKmUS1nGGzdFWbnuM2t0jP24S?usp=sharing)
245
+ 2) [Simple news reader](https://colab.research.google.com/drive/1Ulte8I-A_0vqH7Y7teCkPIflULTHqTc_?usp=sharing)
246
 
247
 
248