Spaces:

Writer
/

palmyra-flavors

Running on CPU Upgrade

App Files Files Community

palmyra-flavors / app.py

wassemgtk

Update app.py

2e040aa verified 23 days ago

raw

history blame contribute delete

7.21 kB

	import gradio as gr
	import requests
	import json
	import os
	import threading
	import queue
	import time

	# Load all configuration from environment variables
	TOGETHER_API_KEY = os.environ.get('TOGETHER_API_KEY', '')
	TOGETHER_API_URL = os.environ.get('TOGETHER_API_URL', 'https://api.together.xyz/v1/chat/completions')

	MODEL_A_NAME = os.environ.get('MODEL_A_NAME', '')
	MODEL_B_NAME = os.environ.get('MODEL_B_NAME', '')
	MODEL_C_NAME = os.environ.get('MODEL_C_NAME', '')

	# Display names for the UI
	MODEL_A_DISPLAY = os.environ.get('MODEL_A_DISPLAY', '')
	MODEL_B_DISPLAY = os.environ.get('MODEL_B_DISPLAY', '')
	MODEL_C_DISPLAY = os.environ.get('MODEL_C_DISPLAY', '')

	# Headers for API calls
	HEADERS = {
	"Authorization": f"Bearer {TOGETHER_API_KEY}",
	"Content-Type": "application/json"
	}

	SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT', """You are an expert conversationalist who responds to the best of your ability. The assistant is Palmyra, created by Writer.""")

	MODELS = {
	"Model A": MODEL_A_NAME,
	"Model B": MODEL_B_NAME,
	"Model C": MODEL_C_NAME
	}

	def stream_together_model(model_name, user_prompt, add_thinking_delay=False):
	if add_thinking_delay:
	yield "Thinking.."
	time.sleep(1)
	yield ""

	body = {
	"model": model_name,
	"messages": [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": user_prompt}
	],
	"stream": True
	}
	try:
	with requests.post(TOGETHER_API_URL, headers=HEADERS, json=body, stream=True) as response:
	response.raise_for_status()
	for line in response.iter_lines():
	if line:
	try:
	data = json.loads(line.decode('utf-8').replace("data: ", ""))
	content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
	if content:
	yield content
	except:
	continue
	except Exception as e:
	yield f"[Error: {str(e)}]"

	def stream_model_c(user_prompt):
	url = "http://192.222.54.94:8000/v1/chat/completions"
	headers = {"Content-Type": "application/json"}
	body = {
	"model": "palmyra-x5-v2",
	"messages": [
	{"role": "user", "content": user_prompt}
	],
	"temperature": 0.07
	}

	try:
	response = requests.post(url, headers=headers, json=body)
	response.raise_for_status()
	data = response.json()
	content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
	yield content
	except Exception as e:
	yield f"[Error: {str(e)}]"

	custom_css = """... (unchanged CSS, keep same) ..."""

	with gr.Blocks(css=custom_css, theme=gr.themes.Base()) as demo:
	gr.HTML("""
	<div class="container">
	<h1>Palmyra-x5</h1>
	</div>
	""")

	with gr.Row():
	chatbot_a = gr.Chatbot(label=MODEL_A_DISPLAY, height=500, bubble_full_width=False)
	chatbot_b = gr.Chatbot(label=MODEL_B_DISPLAY, height=500, bubble_full_width=False)
	chatbot_c = gr.Chatbot(label=MODEL_C_DISPLAY, height=500, bubble_full_width=False)

	with gr.Row():
	user_input = gr.Textbox(placeholder="Type your message...", show_label=False, scale=8)
	thinking_toggle = gr.Checkbox(label="Show Thinking Process", value=True, scale=2)
	submit_btn = gr.Button("Send", scale=1, variant="primary")

	gr.Examples(
	examples=[
	"What does Tencent do?",
	"Explain quantum computing",
	"Write a haiku about AI",
	"Compare Python vs JavaScript",
	"Tips for better sleep"
	],
	inputs=user_input,
	label="Try these examples:"
	)

	def stream_all_models(message, enable_thinking, hist_a, hist_b, hist_c):
	if not message.strip():
	return hist_a, hist_b, hist_c, ""

	hist_a = hist_a + [[message, ""]]
	hist_b = hist_b + [[message, ""]]
	hist_c = hist_c + [[message, ""]]
	yield hist_a, hist_b, hist_c, ""

	q1, q2, q3 = queue.Queue(), queue.Queue(), queue.Queue()

	def fetch_stream(q, model, add_delay=False):
	try:
	for chunk in stream_together_model(model, message, add_delay):
	q.put(chunk)
	finally:
	q.put(None)

	def fetch_stream_c(q, message):
	try:
	for chunk in stream_model_c(message):
	q.put(chunk)
	finally:
	q.put(None)

	threading.Thread(target=fetch_stream, args=(q1, MODELS["Model A"], True)).start()
	threading.Thread(target=fetch_stream, args=(q2, MODELS["Model B"], True)).start()
	threading.Thread(target=fetch_stream, args=(q3, MODELS["Model C"], True)).start()

	done_a = done_b = done_c = False

	while not (done_a and done_b and done_c):
	updated = False

	if not done_a:
	try:
	chunk = q1.get(timeout=0.05)
	if chunk is None:
	done_a = True
	else:
	if chunk == "":
	hist_a[-1][1] = ""
	elif chunk.startswith("\ud83e\udd14"):
	hist_a[-1][1] = chunk
	else:
	hist_a[-1][1] += chunk
	updated = True
	except:
	pass

	if not done_b:
	try:
	chunk = q2.get(timeout=0.05)
	if chunk is None:
	done_b = True
	else:
	if chunk == "":
	hist_b[-1][1] = ""
	elif chunk.startswith("\ud83e\udd14"):
	hist_b[-1][1] = chunk
	else:
	hist_b[-1][1] += chunk
	updated = True
	except:
	pass

	if not done_c:
	try:
	chunk = q3.get(timeout=0.05)
	if chunk is None:
	done_c = True
	else:
	if chunk == "":
	hist_c[-1][1] = ""
	elif chunk.startswith("\ud83e\udd14"):
	hist_c[-1][1] = chunk
	else:
	hist_c[-1][1] += chunk
	updated = True
	except:
	pass

	if updated:
	yield hist_a, hist_b, hist_c, ""

	submit_btn.click(
	stream_all_models,
	[user_input, thinking_toggle, chatbot_a, chatbot_b, chatbot_c],
	[chatbot_a, chatbot_b, chatbot_c, user_input]
	)

	user_input.submit(
	stream_all_models,
	[user_input, thinking_toggle, chatbot_a, chatbot_b, chatbot_c],
	[chatbot_a, chatbot_b, chatbot_c, user_input]
	)

	if __name__ == "__main__":
	demo.launch()