Youtube video transcription with in wolof by Papa Sega

# import whisper
# from pytube import YouTube
# from transformers import pipeline
# import gradio as gr
# import os
# from speechbrain.inference.ASR import EncoderASR
# import gradio as gr

# model = EncoderASR.from_hparams("speechbrain/asr-wav2vec2-dvoice-wolof")

# #model = whisper.load_model("base")
# summarizer = pipeline("summarization")

# def get_audio(url):
#   yt = YouTube(url)
#   video = yt.streams.filter(only_audio=True).first()
#   out_file=video.download(output_path=".")
#   base, ext = os.path.splitext(out_file)
#   new_file = base+'.mp3'
#   os.rename(out_file, new_file)
#   a = new_file
#   return a

# def get_text(url):
#   result = model.transcribe(get_audio(url))
#   return result['text']

# def get_summary(url):
#   article = get_text(url)
#   b = summarizer(article)
#   b = b[0]['summary_text']
#   return b
  
# with gr.Blocks() as demo:
#   gr.Markdown("<h1><center>Youtube video transcription with in wolof by Papa Sega</center></h1>")
#   gr.Markdown("<center>Enter the link of any youtube video to get the transcription of the video and a summary of the video in the form of text.</center>")
#   with gr.Tab('Get the transcription of any Youtube video'):
#     with gr.Row():
#       input_text_1 = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
#       output_text_1 = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
#     result_button_1 = gr.Button('Get Transcription')
#   with gr.Tab('Summary of Youtube video'):
#     with gr.Row():
#       input_text = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
#       output_text = gr.Textbox(placeholder='Summary text of the Youtube Video', label='Summary')
#     result_button = gr.Button('Get Summary')

#   result_button.click(get_summary, inputs = input_text, outputs = output_text)
#   result_button_1.click(get_text, inputs = input_text_1, outputs = output_text_1)
# demo.launch(debug=True)

from pytube import YouTube
from speechbrain.inference.ASR import EncoderASR
import gradio as gr
import os

# Charger le modèle EncoderASR
model = EncoderASR.from_hparams("speechbrain/asr-wav2vec2-dvoice-wolof")

def get_audio(url):
    yt = YouTube(url)
    video = yt.streams.filter(only_audio=True).first()
    out_file = video.download(output_path=".")
    base, ext = os.path.splitext(out_file)
    new_file = base+'.mp3'
    os.rename(out_file, new_file)
    return new_file

def get_text(url):
    audio_file = get_audio(url)
    result = model.transcribe_file(audio_file)
    os.remove(audio_file)  # Supprimer le fichier audio temporaire

    # Ajout d'un débogage pour vérifier la structure de 'result'
    print(f"Transcription result: {result}")

    # Si result est une chaîne de caractères, la retourner directement
    if isinstance(result, str):
        return result

    # Sinon, retourner la valeur associée à la clé 'text'
    return result.get('text', 'Transcription failed')

demo = gr.Interface(
    get_text,
    inputs=gr.Textbox(placeholder='Enter the Youtube video URL', label='URL'),
    outputs=gr.Textbox(placeholder='Transcription of the video', label='Transcription'),
    title="Youtube Video Transcription in Wolof by Papa Sega",
    description="Enter the link of any Youtube video to get the transcription of the video in Wolof.",
)

demo.launch(debug=True)