saheedniyi
/

YarnGPT2

@@ -118,8 +118,7 @@ torchaudio.save(f"audio.wav", audio, sample_rate=24000)
 ```python
 !git clone https://github.com/saheedniyi02/yarngpt.git
-# install some necessary libraries
-!pip install outetts uroman trafilatura pydub
 import os
 import re
@@ -139,77 +138,99 @@ from pydub.effects import normalize
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from outetts.wav_tokenizer.decoder import WavTokenizer
 !wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
 !wget https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt
-from yarngpt.audiotokenizer import AudioTokenizer
-tokenizer_path="saheedniyi/YarnGPT"
 wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
 wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
-audio_tokenizer=AudioTokenizer(
     tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
        )
 model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
 def split_text_into_chunks(text, word_limit=25):
-  """
-  Function to split a long web page into reasonable chunks
-  """
   sentences=[sentence.strip() for sentence in text.split('.') if sentence.strip()]
   chunks=[]
   for sentence in sentences:
     chunks.append(".")
     sentence_splitted=sentence.split(" ")
     num_words=len(sentence_splitted)
-    start_index=0
-    if num_words>word_limit:
-      while start_index<num_words:
-        end_index=min(num_words,start_index+word_limit)
-        chunks.append(" ".join(sentence_splitted[start_index:start_index+word_limit]))
-        start_index=end_index
     else:
       chunks.append(sentence)
   return chunks
-#Extracting the content of a webpage
-page=requests.get("https://punchng.com/expensive-feud-how-burna-boy-cubana-chief-priests-fight-led-to-dollar-rain/")
 content=trafilatura.extract(page.text)
 chunks=split_text_into_chunks(content)
-#Looping over the chunks and adding creating a large `all_codes` list
 all_codes=[]
 for i,chunk in enumerate(chunks):
   print(i)
   print("\n")
   print(chunk)
   if chunk==".":
-    #add silence for 0.25 seconds if we encounter a full stop
-    all_codes.extend([453]*20)
   else:
-    prompt=audio_tokenizer.create_prompt(chunk,"chinenye")
     input_ids=audio_tokenizer.tokenize_prompt(prompt)
     output  = model.generate(
             input_ids=input_ids,
             temperature=0.1,
             repetition_penalty=1.1,
             max_length=4000,
         )
     codes=audio_tokenizer.get_codes(output)
     all_codes.extend(codes)
-# Converting to audio
 audio=audio_tokenizer.get_audio(all_codes)
 IPython.display.Audio(audio,rate=24000)
-torchaudio.save(f"news1.wav", audio, sample_rate=24000)
 ```
 ## Model Description
@@ -221,7 +242,7 @@ torchaudio.save(f"news1.wav", audio, sample_rate=24000)
 - **Repository:** [YarnGPT Github Repository](https://github.com/saheedniyi02/yarngpt)
 - **Paper:** IN PROGRESS.
 - **Demo:** 1) [Prompt YarnGPT2 notebook](https://colab.research.google.com/drive/1PYuCSpGZKmUS1nGGzdFWbnuM2t0jP24S?usp=sharing)
-            2) [Simple news reader](https://colab.research.google.com/drive/1SsXV08kly1TUJVM_NFpKqQWOZ1gUZpGe?usp=sharing)

 ```python
 !git clone https://github.com/saheedniyi02/yarngpt.git
+pip install outetts uroman trafilatura pydub
 import os
 import re
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from outetts.wav_tokenizer.decoder import WavTokenizer
 !wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
 !wget https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt
+from yarngpt.audiotokenizer import AudioTokenizerV2
+tokenizer_path="saheedniyi/YarnGPT2"
 wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
 wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
+audio_tokenizer=AudioTokenizerV2(
     tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
        )
 model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
+# Split text into chunks
 def split_text_into_chunks(text, word_limit=25):
   sentences=[sentence.strip() for sentence in text.split('.') if sentence.strip()]
   chunks=[]
   for sentence in sentences:
     chunks.append(".")
     sentence_splitted=sentence.split(" ")
     num_words=len(sentence_splitted)
+    if (num_words>word_limit) and (num_words<=word_limit*2):
+      chunks.append(" ".join(sentence_splitted[:int(num_words/2)]))
+      chunks.append(" ".join(sentence_splitted[int(num_words/2):]))
+    elif (num_words>word_limit*2) and (num_words<=word_limit*3):
+      chunks.append(" ".join(sentence_splitted[:int(num_words/3)]))
+      chunks.append(" ".join(sentence_splitted[int(num_words/3):int(2*num_words/3)]))
+      chunks.append(" ".join(sentence_splitted[int(2*num_words/3):]))
+    elif (num_words>word_limit*3) and (num_words<=word_limit*4):
+      chunks.append(" ".join(sentence_splitted[:int(num_words/4)]))
+      chunks.append(" ".join(sentence_splitted[int(num_words/4):word_limit*2]))
+      chunks.append(" ".join(sentence_splitted[int(2*num_words/4):int(3*num_words/4)]))
+      chunks.append(" ".join(sentence_splitted[int(3*num_words/4):]))
+    elif (num_words>word_limit*4) and (num_words<=word_limit*5):
+      chunks.append(" ".join(sentence_splitted[:int(num_words/5)]))
+      chunks.append(" ".join(sentence_splitted[int(num_words/5):int(2*num_words/5)]))
+      chunks.append(" ".join(sentence_splitted[int(2*num_words/5):int(3*num_words/5)]))
+      chunks.append(" ".join(sentence_splitted[int(3*num_words/5):int(4*num_words/5)]))
+      chunks.append(" ".join(sentence_splitted[int(4*num_words/5):]))
     else:
       chunks.append(sentence)
   return chunks
+def speed_change(sound, speed=0.9):
+    # Manually override the frame_rate. This tells the computer how many
+    # samples to play per second
+    sound_with_altered_frame_rate = sound._spawn(sound.raw_data, overrides={
+         "frame_rate": int(sound.frame_rate * speed)
+      })
+     # convert the sound with altered frame rate to a standard frame rate
+     # so that regular playback programs will work right. They often only
+     # know how to play audio at standard frame rate (like 44.1k)
+    return sound_with_altered_frame_rate.set_frame_rate(sound.frame_rate)
+#change the url
+url="https://punchng.com/im-not-desperate-for-2027-presidential-ticket-obi/"
+page=requests.get(url)
 content=trafilatura.extract(page.text)
 chunks=split_text_into_chunks(content)
 all_codes=[]
+#Looping over the chunks and adding creating a large `all_codes` list
 for i,chunk in enumerate(chunks):
   print(i)
   print("\n")
   print(chunk)
   if chunk==".":
+    #add silence for 0.5 seconds if we encounter a full stop
+    all_codes.extend([453]*38)
   else:
+    # Change the language and voice here
+    prompt=audio_tokenizer.create_prompt(chunk,lang="english",speaker_name="jude")
     input_ids=audio_tokenizer.tokenize_prompt(prompt)
     output  = model.generate(
             input_ids=input_ids,
             temperature=0.1,
             repetition_penalty=1.1,
             max_length=4000,
+            #num_beams=5,
         )
     codes=audio_tokenizer.get_codes(output)
     all_codes.extend(codes)
 audio=audio_tokenizer.get_audio(all_codes)
 IPython.display.Audio(audio,rate=24000)
+torchaudio.save(f"news1.wav",
+                audio,
+                sample_rate=24000,
+)
 ```
 ## Model Description
 - **Repository:** [YarnGPT Github Repository](https://github.com/saheedniyi02/yarngpt)
 - **Paper:** IN PROGRESS.
 - **Demo:** 1) [Prompt YarnGPT2 notebook](https://colab.research.google.com/drive/1PYuCSpGZKmUS1nGGzdFWbnuM2t0jP24S?usp=sharing)
+            2) [Simple news reader](https://colab.research.google.com/drive/1Ulte8I-A_0vqH7Y7teCkPIflULTHqTc_?usp=sharing)