File size: 3,445 Bytes
e73bf6b
206cf28
e73bf6b
 
4a894f2
 
 
92f18c3
4a894f2
 
 
 
 
 
e73bf6b
92f18c3
e73bf6b
 
92f18c3
 
e73bf6b
92f18c3
e73bf6b
 
 
92f18c3
e73bf6b
92f18c3
e73bf6b
 
92f18c3
e73bf6b
 
 
 
92f18c3
 
 
 
 
 
 
 
e73bf6b
92f18c3
 
 
 
 
e73bf6b
92f18c3
 
 
 
e73bf6b
 
 
92f18c3
e73bf6b
 
92f18c3
 
 
 
 
 
 
 
e73bf6b
92f18c3
 
 
 
 
 
 
 
 
e73bf6b
92f18c3
 
 
 
e73bf6b
 
92f18c3
e73bf6b
92f18c3
e73bf6b
92f18c3
e73bf6b
206cf28
92f18c3
 
 
206cf28
e73bf6b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import gradio as gr
import torch
import yt_dlp
import os
import subprocess
import json
from threading import Thread
from transformers import AutoTokenizer, AutoModelForCausalLM
import spaces
from moviepy import editor as mp  # FIXED IMPORT
import time
import langdetect
import uuid


# Load LLM Model
model_path = "Qwen/Qwen2.5-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
model.eval()

# Function to generate a unique filename
def generate_unique_filename(extension):
    return f"{uuid.uuid4()}{extension}"

# Function to download audio from a YouTube video
def download_youtube_audio(url):
    output_path = generate_unique_filename(".mp3")
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3'}],
        'outtmpl': output_path,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
    return output_path if os.path.exists(output_path) else None

# Function to extract audio from a video file
def extract_audio(video_path):
    video = mp.VideoFileClip(video_path)
    audio_path = generate_unique_filename(".mp3")
    video.audio.write_audiofile(audio_path)
    return audio_path

# Function to transcribe audio using Whisper
def transcribe_audio(audio_path):
    model = whisper.load_model("base")
    result = model.transcribe(audio_path)
    return result["text"]

# Function to summarize text using LLM
def summarize_text(text):
    detected_language = langdetect.detect(text)
    prompt = f"Summarize the following text in 150-300 words in {detected_language}: {text[:300000]}..."
    response, _ = model.chat(tokenizer, prompt, history=[])
    return response

# Gradio function to process YouTube links
def process_youtube(url):
    if not url:
        return "Please enter a YouTube URL.", ""
    audio_path = download_youtube_audio(url)
    if not audio_path:
        return "Error downloading YouTube audio.", ""
    transcription = transcribe_audio(audio_path)
    summary = summarize_text(transcription)
    os.remove(audio_path)
    return transcription, summary

# Gradio function to process uploaded videos
def process_uploaded_video(video):
    if not video:
        return "No video uploaded.", ""
    audio_path = extract_audio(video)
    transcription = transcribe_audio(audio_path)
    summary = summarize_text(transcription)
    os.remove(audio_path)
    return transcription, summary

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎥 Video Summarizer")
    
    with gr.Tabs():
        with gr.TabItem("📤 Upload Video"):
            video_input = gr.Video(label="Upload Video")
            video_button = gr.Button("Process Video")
        
        with gr.TabItem("🔗 YouTube Link"):
            url_input = gr.Textbox(label="Enter YouTube URL")
            url_button = gr.Button("Process URL")
    
    with gr.Row():
        transcription_output = gr.Textbox(label="📝 Transcription", lines=10)
        summary_output = gr.Textbox(label="📊 Summary", lines=10)
    
    video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
    url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])

demo.launch()