File size: 2,287 Bytes
65134ea
f276112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65134ea
f276112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65134ea
f276112
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import gradio as gr
from groq import Groq

api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=api_key)

if not api_key:
    raise ValueError("API key not found. Please set the GROQ_API_KEY environment variable.")

def transcribe_audio(file_path):
    with open(file_path, "rb") as file:
        transcription = client.audio.transcriptions.create(
            file=(file_path, file.read()),
            model="whisper-large-v3",
            response_format="verbose_json",
        )
        return transcription.text

def get_chat_completion(prompt):
    completion = client.chat.completions.create(
        model="llama-3.2-11b-text-preview",
        messages=[
            {
                "role": "user",
                "content": prompt
            }
        ],
        temperature=1,
        max_tokens=1024,
        top_p=1,
        stream=True,
        stop=None,
    )

    response = ""
    for chunk in completion:
        response += chunk.choices[0].delta.content or ""
    return response

def process_input(audio_file, text_input, chat_history):
    if audio_file is not None:
        transcription_text = transcribe_audio(audio_file)
    else:
        transcription_text = text_input

    chat_response = get_chat_completion(transcription_text)
    chat_history.append(("👤", transcription_text))
    chat_history.append(("🤖", chat_response))

    formatted_history = "\n".join([f"{role}: {content}\n" for role, content in chat_history])

    return formatted_history, gr.update(value=None), gr.update(value=''), chat_history

# Create Gradio interface
interface = gr.Interface(
    fn=process_input,
    inputs=[
        gr.Audio(type="filepath", label="Upload Audio or Record"),
        gr.Textbox(lines=2, placeholder="Or type text here", label="Text Input"),
        gr.State([])
    ],
    outputs=[
        gr.Textbox(label="Chat History", lines=20),
        gr.Audio(visible=False),
        gr.Textbox(visible=False),
        gr.State()
    ],
    title="Chat with Llama 3.2-11B With Text or Voice (Whisper Large-v3)",
    description="Upload an audio file or type text to get a chat response based on the transcription.",
    allow_flagging='never'  # Prevent flagging to isolate sessions
)

if __name__ == "__main__":
    interface.launch()