Spaces:

Nymbo
/

Groq-Playground-Master

Build error

App Files Files Community

Groq-Playground-Master / app.py

Nymbo

Upload 4 files

cf27b9a verified about 1 year ago

raw

history blame contribute delete

17.3 kB

	import os
	import subprocess
	import random
	import numpy as np
	import json
	from datetime import timedelta
	import tempfile
	import gradio as gr
	from groq import Groq

	client = Groq(api_key=os.environ.get("Groq_Api_Key"))


	# llms

	MAX_SEED = np.iinfo(np.int32).max

	def update_max_tokens(model):
	if model in ["llama3-70b-8192", "llama3-8b-8192", "gemma-7b-it", "gemma2-9b-it"]:
	return gr.update(maximum=8192)
	elif model == "mixtral-8x7b-32768":
	return gr.update(maximum=32768)

	def create_history_messages(history):
	history_messages = [{"role": "user", "content": m[0]} for m in history]
	history_messages.extend([{"role": "assistant", "content": m[1]} for m in history])
	return history_messages

	def generate_response(prompt, history, model, temperature, max_tokens, top_p, seed):
	messages = create_history_messages(history)
	messages.append({"role": "user", "content": prompt})
	print(messages)

	if seed == 0:
	seed = random.randint(1, MAX_SEED)

	stream = client.chat.completions.create(
	messages=messages,
	model=model,
	temperature=temperature,
	max_tokens=max_tokens,
	top_p=top_p,
	seed=seed,
	stop=None,
	stream=True,
	)

	response = ""
	for chunk in stream:
	delta_content = chunk.choices[0].delta.content
	if delta_content is not None:
	response += delta_content
	yield response

	return response

	# speech to text

	ALLOWED_FILE_EXTENSIONS = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
	MAX_FILE_SIZE_MB = 25

	LANGUAGE_CODES = {
	"English": "en",
	"Chinese": "zh",
	"German": "de",
	"Spanish": "es",
	"Russian": "ru",
	"Korean": "ko",
	"French": "fr",
	"Japanese": "ja",
	"Portuguese": "pt",
	"Turkish": "tr",
	"Polish": "pl",
	"Catalan": "ca",
	"Dutch": "nl",
	"Arabic": "ar",
	"Swedish": "sv",
	"Italian": "it",
	"Indonesian": "id",
	"Hindi": "hi",
	"Finnish": "fi",
	"Vietnamese": "vi",
	"Hebrew": "he",
	"Ukrainian": "uk",
	"Greek": "el",
	"Malay": "ms",
	"Czech": "cs",
	"Romanian": "ro",
	"Danish": "da",
	"Hungarian": "hu",
	"Tamil": "ta",
	"Norwegian": "no",
	"Thai": "th",
	"Urdu": "ur",
	"Croatian": "hr",
	"Bulgarian": "bg",
	"Lithuanian": "lt",
	"Latin": "la",
	"Māori": "mi",
	"Malayalam": "ml",
	"Welsh": "cy",
	"Slovak": "sk",
	"Telugu": "te",
	"Persian": "fa",
	"Latvian": "lv",
	"Bengali": "bn",
	"Serbian": "sr",
	"Azerbaijani": "az",
	"Slovenian": "sl",
	"Kannada": "kn",
	"Estonian": "et",
	"Macedonian": "mk",
	"Breton": "br",
	"Basque": "eu",
	"Icelandic": "is",
	"Armenian": "hy",
	"Nepali": "ne",
	"Mongolian": "mn",
	"Bosnian": "bs",
	"Kazakh": "kk",
	"Albanian": "sq",
	"Swahili": "sw",
	"Galician": "gl",
	"Marathi": "mr",
	"Panjabi": "pa",
	"Sinhala": "si",
	"Khmer": "km",
	"Shona": "sn",
	"Yoruba": "yo",
	"Somali": "so",
	"Afrikaans": "af",
	"Occitan": "oc",
	"Georgian": "ka",
	"Belarusian": "be",
	"Tajik": "tg",
	"Sindhi": "sd",
	"Gujarati": "gu",
	"Amharic": "am",
	"Yiddish": "yi",
	"Lao": "lo",
	"Uzbek": "uz",
	"Faroese": "fo",
	"Haitian": "ht",
	"Pashto": "ps",
	"Turkmen": "tk",
	"Norwegian Nynorsk": "nn",
	"Maltese": "mt",
	"Sanskrit": "sa",
	"Luxembourgish": "lb",
	"Burmese": "my",
	"Tibetan": "bo",
	"Tagalog": "tl",
	"Malagasy": "mg",
	"Assamese": "as",
	"Tatar": "tt",
	"Hawaiian": "haw",
	"Lingala": "ln",
	"Hausa": "ha",
	"Bashkir": "ba",
	"jw": "jw",
	"Sundanese": "su",
	}

	# Checks file extension, size, and downsamples if needed.
	def check_file(audio_file_path):
	if not audio_file_path:
	return None, gr.Error("Please upload an audio file.")

	file_size_mb = os.path.getsize(audio_file_path) / (1024 * 1024)
	file_extension = audio_file_path.split(".")[-1].lower()

	if file_extension not in ALLOWED_FILE_EXTENSIONS:
	return (
	None,
	gr.Error(
	f"Invalid file type (.{file_extension}). Allowed types: {', '.join(ALLOWED_FILE_EXTENSIONS)}"
	),
	)

	if file_size_mb > MAX_FILE_SIZE_MB:
	gr.Warning(
	f"File size too large ({file_size_mb:.2f} MB). Attempting to downsample to 16kHz. Maximum allowed: {MAX_FILE_SIZE_MB} MB"
	)

	output_file_path = os.path.splitext(audio_file_path)[0] + "_downsampled.wav"
	try:
	subprocess.run(
	[
	"ffmpeg",
	"-i",
	audio_file_path,
	"-ar",
	"16000",
	"-ac",
	"1",
	"-map",
	"0:a:",
	output_file_path,
	],
	check=True,
	)

	# Check size after downsampling
	downsampled_size_mb = os.path.getsize(output_file_path) / (1024 * 1024)
	if downsampled_size_mb > MAX_FILE_SIZE_MB:
	return (
	None,
	gr.Error(
	f"File size still too large after downsampling ({downsampled_size_mb:.2f} MB). Maximum allowed: {MAX_FILE_SIZE_MB} MB"
	),
	)

	return output_file_path, None
	except subprocess.CalledProcessError as e:
	return None, gr.Error(f"Error during downsampling: {e}")
	return audio_file_path, None


	def transcribe_audio(audio_file_path, prompt, language, auto_detect_language, model):
	# Check and process the file first
	processed_path, error_message = check_file(audio_file_path)

	# If there's an error during file check
	if error_message:
	return error_message

	with open(processed_path, "rb") as file:
	transcription = client.audio.transcriptions.create(
	file=(os.path.basename(processed_path), file.read()),
	model=model,
	prompt=prompt,
	response_format="text",
	language=None if auto_detect_language else language,
	temperature=0.0,
	)
	return transcription.text


	def translate_audio(audio_file_path, prompt, model):
	# Check and process the file first
	processed_path, error_message = check_file(audio_file_path)

	# If there's an error during file check
	if error_message:
	return error_message

	with open(processed_path, "rb") as file:
	translation = client.audio.translations.create(
	file=(os.path.basename(processed_path), file.read()),
	model=model,
	prompt=prompt,
	response_format="text",
	temperature=0.0,
	)
	return translation.text


	# subtitles maker

	def format_time(seconds):
	hours = int(seconds // 3600)
	minutes = int((seconds % 3600) // 60)
	seconds = int(seconds % 60)
	milliseconds = int((seconds % 1) * 1000)

	return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"

	def json_to_srt(transcription_json):
	srt_lines = []

	for segment in transcription_json:
	start_time = format_time(segment['start'])
	end_time = format_time(segment['end'])
	text = segment['text']

	srt_line = f"{segment['id']+1}\n{start_time} --> {end_time}\n{text}\n"
	srt_lines.append(srt_line)

	return '\n'.join(srt_lines)


	def generate_subtitles(audio_file_path, prompt, language, auto_detect_language, model):
	# Check and process the file first
	processed_path, error_message = check_file(audio_file_path)

	if error_message:
	return None, None, error_message

	with open(processed_path, "rb") as file:
	transcription_json_response = client.audio.transcriptions.create(
	file=(os.path.basename(processed_path), file.read()),
	model=model,
	prompt=prompt,
	response_format="verbose_json",
	language=None if auto_detect_language else language,
	temperature=0.0,
	)

	# Directly access the segments attribute
	transcription_json = transcription_json_response.segments

	try:
	srt_content = json_to_srt(transcription_json)
	except ValueError as e:
	return None, None, f"Error creating SRT file: {e}"

	with tempfile.NamedTemporaryFile(mode="w", suffix=".srt", delete=False) as temp_srt_file:
	temp_srt_path = temp_srt_file.name
	temp_srt_file.write(srt_content)

	if audio_file_path.lower().endswith((".mp4", ".webm")):
	try:
	output_file_path = audio_file_path.replace(os.path.splitext(audio_file_path)[1], "_with_subs" + os.path.splitext(audio_file_path)[1])
	subprocess.run(
	[
	"ffmpeg",
	"-i",
	audio_file_path,
	"-vf",
	f"subtitles={temp_srt_path}",
	output_file_path,
	],
	check=True,
	)
	return temp_srt_path, output_file_path, None
	except subprocess.CalledProcessError as e:
	return None, None, f"Error during subtitle addition: {e}"

	return temp_srt_path, None, None

	with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
	with gr.Tabs():
	with gr.TabItem("LLMs"):
	with gr.Row():
	with gr.Column(scale=1, min_width=250):
	model = gr.Dropdown(
	choices=[
	"llama3-70b-8192",
	"llama3-8b-8192",
	"mixtral-8x7b-32768",
	"gemma-7b-it",
	"gemma2-9b-it",
	],
	value="llama3-70b-8192",
	label="Model",
	)
	temperature = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	step=0.01,
	value=0.5,
	label="Temperature",
	info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative.",
	)
	max_tokens = gr.Slider(
	minimum=1,
	maximum=8192,
	step=1,
	value=4096,
	label="Max Tokens",
	info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b.",
	)
	top_p = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	step=0.01,
	value=0.5,
	label="Top P",
	info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p.",
	)
	seed = gr.Number(
	precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random"
	)
	model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
	with gr.Column(scale=1, min_width=400):
	chatbot = gr.ChatInterface(
	fn=generate_response,
	chatbot=None,
	additional_inputs=[
	model,
	temperature,
	max_tokens,
	top_p,
	seed,
	],
	)
	model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
	with gr.TabItem("Speech To Text"):
	with gr.Tabs():
	with gr.TabItem("Transcription"):
	gr.Markdown("Transcript audio from files to text!")
	with gr.Row():
	audio_input = gr.File(
	type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
	)
	model_choice_transcribe = gr.Dropdown(
	choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
	value="whisper-large-v3",
	label="Model",
	)
	with gr.Row():
	transcribe_prompt = gr.Textbox(
	label="Prompt (Optional)",
	info="Specify any context or spelling corrections.",
	)
	with gr.Column():
	language = gr.Dropdown(
	choices=[(lang, code) for lang, code in LANGUAGE_CODES.items()],
	value="en",
	label="Language",
	)
	auto_detect_language = gr.Checkbox(label="Auto Detect Language")
	transcribe_button = gr.Button("Transcribe")
	transcription_output = gr.Textbox(label="Transcription")
	transcribe_button.click(
	transcribe_audio,
	inputs=[audio_input, transcribe_prompt, language, auto_detect_language, model_choice_transcribe],
	outputs=transcription_output,
	)
	with gr.TabItem("Translation"):
	gr.Markdown("Transcript audio from files and translate them to English text!")
	with gr.Row():
	audio_input_translate = gr.File(
	type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
	)
	model_choice_translate = gr.Dropdown(
	choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
	value="whisper-large-v3",
	label="Model",
	)
	with gr.Row():
	translate_prompt = gr.Textbox(
	label="Prompt (Optional)",
	info="Specify any context or spelling corrections.",
	)
	translate_button = gr.Button("Translate")
	translation_output = gr.Textbox(label="Translation")
	translate_button.click(
	translate_audio,
	inputs=[audio_input_translate, translate_prompt, model_choice_translate],
	outputs=translation_output,
	)
	with gr.TabItem("Subtitle Maker"):
	with gr.Row():
	audio_input_subtitles = gr.File(
	label="Upload Audio/Video",
	file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS],
	)
	model_choice_subtitles = gr.Dropdown(
	choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
	value="whisper-large-v3",
	label="Model",
	)
	transcribe_prompt_subtitles = gr.Textbox(
	label="Prompt (Optional)",
	info="Specify any context or spelling corrections.",
	)
	with gr.Row():
	language_subtitles = gr.Dropdown(
	choices=[(lang, code) for lang, code in LANGUAGE_CODES.items()],
	value="en",
	label="Language",
	)
	auto_detect_language_subtitles = gr.Checkbox(
	label="Auto Detect Language"
	)
	transcribe_button_subtitles = gr.Button("Generate Subtitles")
	srt_output = gr.File(label="SRT Output File")
	video_output = gr.File(label="Output Video with Subtitles")
	transcribe_button_subtitles.click(
	generate_subtitles,
	inputs=[
	audio_input_subtitles,
	transcribe_prompt_subtitles,
	language_subtitles,
	auto_detect_language_subtitles,
	model_choice_subtitles,
	],
	outputs=[srt_output, video_output, gr.Textbox(label="Error")],
	)

	demo.launch()