Spaces:

VIDraft
/

Gemma-3-R1984-27B

Running

App Files Files Community

Gemma-3-R1984-27B / app.py

openfree

Update app.py

3cf27d9 verified 2 days ago

raw

history blame contribute delete

25.3 kB

	#!/usr/bin/env python

	import os
	import re
	import json
	import requests
	from collections.abc import Iterator
	from threading import Thread

	import gradio as gr
	from loguru import logger
	import pandas as pd
	import PyPDF2

	##############################################################################
	# API Configuration
	##############################################################################
	FRIENDLI_TOKEN = os.environ.get("FRIENDLI_TOKEN")
	if not FRIENDLI_TOKEN:
	raise ValueError("Please set FRIENDLI_TOKEN environment variable")

	FRIENDLI_MODEL_ID = "dep89a2fld32mcm"
	FRIENDLI_API_URL = "https://api.friendli.ai/dedicated/v1/chat/completions"

	# SERPHouse API key
	SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
	if not SERPHOUSE_API_KEY:
	logger.warning("SERPHOUSE_API_KEY not set. Web search functionality will be limited.")

	##############################################################################
	# File Processing Constants
	##############################################################################
	MAX_FILE_SIZE = 30 * 1024 * 1024 # 30MB
	MAX_CONTENT_CHARS = 6000

	##############################################################################
	# Improved Keyword Extraction
	##############################################################################
	def extract_keywords(text: str, top_k: int = 5) -> str:
	"""
	Extract keywords: supports English and Korean
	"""
	stop_words = {'은', '는', '이', '가', '을', '를', '의', '에', '에서',
	'the', 'is', 'at', 'on', 'in', 'a', 'an', 'and', 'or', 'but'}

	text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
	tokens = text.split()

	key_tokens = [
	token for token in tokens
	if token.lower() not in stop_words and len(token) > 1
	][:top_k]

	return " ".join(key_tokens)

	##############################################################################
	# File Size Validation
	##############################################################################
	def validate_file_size(file_path: str) -> bool:
	"""Check if file size is within limits"""
	try:
	file_size = os.path.getsize(file_path)
	return file_size <= MAX_FILE_SIZE
	except:
	return False

	##############################################################################
	# Web Search Function
	##############################################################################
	def do_web_search(query: str, use_korean: bool = False) -> str:
	"""
	Search web and return top 20 organic results
	"""
	if not SERPHOUSE_API_KEY:
	return "Web search unavailable. API key not configured."

	try:
	url = "https://api.serphouse.com/serp/live"

	params = {
	"q": query,
	"domain": "google.com",
	"serp_type": "web",
	"device": "desktop",
	"lang": "ko" if use_korean else "en",
	"num": "20"
	}

	headers = {
	"Authorization": f"Bearer {SERPHOUSE_API_KEY}"
	}

	logger.info(f"Calling SerpHouse API... Query: {query}")

	response = requests.get(url, headers=headers, params=params, timeout=30)
	response.raise_for_status()

	data = response.json()

	# Parse results
	results = data.get("results", {})
	organic = None

	if isinstance(results, dict) and "organic" in results:
	organic = results["organic"]
	elif isinstance(results, dict) and "results" in results:
	if isinstance(results["results"], dict) and "organic" in results["results"]:
	organic = results["results"]["organic"]
	elif "organic" in data:
	organic = data["organic"]

	if not organic:
	return "No search results found or unexpected API response structure."

	max_results = min(20, len(organic))
	limited_organic = organic[:max_results]

	summary_lines = []
	for idx, item in enumerate(limited_organic, start=1):
	title = item.get("title", "No title")
	link = item.get("link", "#")
	snippet = item.get("snippet", "No description")
	displayed_link = item.get("displayed_link", link)

	summary_lines.append(
	f"### Result {idx}: {title}\n\n"
	f"{snippet}\n\n"
	f"Source: [{displayed_link}]({link})\n\n"
	f"---\n"
	)

	instructions = """
	# Web Search Results
	Below are the search results. Use this information when answering questions:
	1. Reference the title, content, and source links
	2. Explicitly cite sources in your answer (e.g., "According to source X...")
	3. Include actual source links in your response
	4. Synthesize information from multiple sources
	"""

	search_results = instructions + "\n".join(summary_lines)
	return search_results

	except requests.exceptions.Timeout:
	logger.error("Web search timeout")
	return "Web search timed out. Please try again."
	except requests.exceptions.RequestException as e:
	logger.error(f"Web search network error: {e}")
	return "Network error during web search."
	except Exception as e:
	logger.error(f"Web search failed: {e}")
	return f"Web search failed: {str(e)}"

	##############################################################################
	# File Analysis Functions
	##############################################################################
	def analyze_csv_file(path: str) -> str:
	"""Analyze CSV file with size validation and encoding handling"""
	if not validate_file_size(path):
	return f"⚠️ Error: File size exceeds {MAX_FILE_SIZE/1024/1024:.1f}MB limit."

	try:
	encodings = ['utf-8', 'cp949', 'euc-kr', 'latin-1']
	df = None

	for encoding in encodings:
	try:
	df = pd.read_csv(path, encoding=encoding, nrows=50)
	break
	except UnicodeDecodeError:
	continue

	if df is None:
	return f"Failed to read CSV: Unsupported encoding"

	total_rows = len(pd.read_csv(path, encoding=encoding, usecols=[0]))

	if df.shape[1] > 10:
	df = df.iloc[:, :10]

	summary = f"Data size: {total_rows} rows x {df.shape[1]} columns\n"
	summary += f"Showing: Top {min(50, total_rows)} rows\n"
	summary += f"Columns: {', '.join(df.columns)}\n\n"

	df_str = df.to_string()
	if len(df_str) > MAX_CONTENT_CHARS:
	df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."

	return f"[CSV File: {os.path.basename(path)}]\n\n{summary}{df_str}"
	except Exception as e:
	logger.error(f"CSV read error: {e}")
	return f"Failed to read CSV file ({os.path.basename(path)}): {str(e)}"

	def analyze_txt_file(path: str) -> str:
	"""Analyze text file with automatic encoding detection"""
	if not validate_file_size(path):
	return f"⚠️ Error: File size exceeds {MAX_FILE_SIZE/1024/1024:.1f}MB limit."

	encodings = ['utf-8', 'cp949', 'euc-kr', 'latin-1', 'utf-16']

	for encoding in encodings:
	try:
	with open(path, "r", encoding=encoding) as f:
	text = f.read()

	file_size = os.path.getsize(path)
	size_info = f"File size: {file_size/1024:.1f}KB\n\n"

	if len(text) > MAX_CONTENT_CHARS:
	text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."

	return f"[TXT File: {os.path.basename(path)}]\n\n{size_info}{text}"
	except UnicodeDecodeError:
	continue

	return f"Failed to read text file ({os.path.basename(path)}): Unsupported encoding"

	def pdf_to_markdown(pdf_path: str) -> str:
	"""Convert PDF to markdown with improved error handling"""
	if not validate_file_size(pdf_path):
	return f"⚠️ Error: File size exceeds {MAX_FILE_SIZE/1024/1024:.1f}MB limit."

	text_chunks = []
	try:
	with open(pdf_path, "rb") as f:
	reader = PyPDF2.PdfReader(f)
	total_pages = len(reader.pages)
	max_pages = min(5, total_pages)

	text_chunks.append(f"Total pages: {total_pages}")
	text_chunks.append(f"Showing: First {max_pages} pages\n")

	for page_num in range(max_pages):
	try:
	page = reader.pages[page_num]
	page_text = page.extract_text() or ""
	page_text = page_text.strip()

	if page_text:
	if len(page_text) > MAX_CONTENT_CHARS // max_pages:
	page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
	text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
	except Exception as e:
	text_chunks.append(f"## Page {page_num+1}\n\nFailed to read page: {str(e)}\n")

	if total_pages > max_pages:
	text_chunks.append(f"\n...({max_pages}/{total_pages} pages shown)...")
	except Exception as e:
	logger.error(f"PDF read error: {e}")
	return f"Failed to read PDF file ({os.path.basename(pdf_path)}): {str(e)}"

	full_text = "\n".join(text_chunks)
	if len(full_text) > MAX_CONTENT_CHARS:
	full_text = full_text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."

	return f"[PDF File: {os.path.basename(pdf_path)}]\n\n{full_text}"

	##############################################################################
	# File Type Check Functions
	##############################################################################
	def is_image_file(file_path: str) -> bool:
	"""Check if file is an image"""
	return bool(re.search(r"\.(png\|jpg\|jpeg\|gif\|webp)$", file_path, re.IGNORECASE))

	def is_video_file(file_path: str) -> bool:
	"""Check if file is a video"""
	return bool(re.search(r"\.(mp4\|avi\|mov\|mkv)$", file_path, re.IGNORECASE))

	def is_document_file(file_path: str) -> bool:
	"""Check if file is a document"""
	return bool(re.search(r"\.(pdf\|csv\|txt)$", file_path, re.IGNORECASE))

	##############################################################################
	# Message Processing Functions
	##############################################################################
	def process_new_user_message(message: dict) -> str:
	"""Process user message and convert to text"""
	content_parts = [message["text"]]

	if not message.get("files"):
	return message["text"]

	# Classify files
	csv_files = []
	txt_files = []
	pdf_files = []
	image_files = []
	video_files = []
	unknown_files = []

	for file_path in message["files"]:
	if file_path.lower().endswith(".csv"):
	csv_files.append(file_path)
	elif file_path.lower().endswith(".txt"):
	txt_files.append(file_path)
	elif file_path.lower().endswith(".pdf"):
	pdf_files.append(file_path)
	elif is_image_file(file_path):
	image_files.append(file_path)
	elif is_video_file(file_path):
	video_files.append(file_path)
	else:
	unknown_files.append(file_path)

	# Process document files
	for csv_path in csv_files:
	csv_analysis = analyze_csv_file(csv_path)
	content_parts.append(csv_analysis)

	for txt_path in txt_files:
	txt_analysis = analyze_txt_file(txt_path)
	content_parts.append(txt_analysis)

	for pdf_path in pdf_files:
	pdf_markdown = pdf_to_markdown(pdf_path)
	content_parts.append(pdf_markdown)

	# Warning messages for unsupported files
	if image_files:
	image_names = [os.path.basename(f) for f in image_files]
	content_parts.append(
	f"\n⚠️ Image files detected: {', '.join(image_names)}\n"
	"This demo currently does not support image analysis. "
	"Please describe the image content in text if you need help with it."
	)

	if video_files:
	video_names = [os.path.basename(f) for f in video_files]
	content_parts.append(
	f"\n⚠️ Video files detected: {', '.join(video_names)}\n"
	"This demo currently does not support video analysis. "
	"Please describe the video content in text if you need help with it."
	)

	if unknown_files:
	unknown_names = [os.path.basename(f) for f in unknown_files]
	content_parts.append(
	f"\n⚠️ Unsupported file format: {', '.join(unknown_names)}\n"
	"Supported formats: PDF, CSV, TXT"
	)

	return "\n\n".join(content_parts)

	def process_history(history: list[dict]) -> list[dict]:
	"""Convert conversation history to Friendli API format"""
	messages = []

	for item in history:
	if item["role"] == "assistant":
	messages.append({
	"role": "assistant",
	"content": item["content"]
	})
	else: # user
	content = item["content"]
	if isinstance(content, str):
	messages.append({
	"role": "user",
	"content": content
	})
	elif isinstance(content, list) and len(content) > 0:
	# File processing
	file_info = []
	for file_path in content:
	if isinstance(file_path, str):
	file_info.append(f"[File: {os.path.basename(file_path)}]")
	if file_info:
	messages.append({
	"role": "user",
	"content": " ".join(file_info)
	})

	return messages

	##############################################################################
	# Streaming Response Handler
	##############################################################################
	def stream_friendli_response(messages: list[dict], max_tokens: int = 1000) -> Iterator[str]:
	"""Get streaming response from Friendli AI API"""
	headers = {
	"Authorization": f"Bearer {FRIENDLI_TOKEN}",
	"Content-Type": "application/json"
	}

	payload = {
	"model": FRIENDLI_MODEL_ID,
	"messages": messages,
	"max_tokens": max_tokens,
	"top_p": 0.8,
	"temperature": 0.7,
	"stream": True,
	"stream_options": {
	"include_usage": True
	}
	}

	try:
	response = requests.post(
	FRIENDLI_API_URL,
	headers=headers,
	json=payload,
	stream=True,
	timeout=60
	)
	response.raise_for_status()

	full_response = ""
	for line in response.iter_lines():
	if line:
	line_text = line.decode('utf-8')
	if line_text.startswith("data: "):
	data_str = line_text[6:]
	if data_str == "[DONE]":
	break

	try:
	data = json.loads(data_str)
	if "choices" in data and len(data["choices"]) > 0:
	delta = data["choices"][0].get("delta", {})
	content = delta.get("content", "")
	if content:
	full_response += content
	yield full_response
	except json.JSONDecodeError:
	logger.warning(f"JSON parsing failed: {data_str}")
	continue

	except requests.exceptions.Timeout:
	yield "⚠️ Response timeout. Please try again."
	except requests.exceptions.RequestException as e:
	logger.error(f"Friendli API network error: {e}")
	yield f"⚠️ Network error occurred: {str(e)}"
	except Exception as e:
	logger.error(f"Friendli API error: {str(e)}")
	yield f"⚠️ API call error: {str(e)}"

	##############################################################################
	# Main Inference Function
	##############################################################################

	def run(
	message: dict,
	history: list[dict],
	max_new_tokens: int = 512,
	use_web_search: bool = False,
	use_korean: bool = False,
	system_prompt: str = "",
	) -> Iterator[str]:

	try:
	# Prepare system message
	messages = []

	if use_korean:
	combined_system_msg = "너는 AI 어시스턴트 역할이다. 한국어로 친절하고 정확하게 답변해라."
	else:
	combined_system_msg = "You are an AI assistant. Please respond helpfully and accurately in English."

	if system_prompt.strip():
	combined_system_msg += f"\n\n{system_prompt.strip()}"

	# Web search processing
	if use_web_search:
	user_text = message.get("text", "")
	if user_text:
	ws_query = extract_keywords(user_text, top_k=5)
	if ws_query.strip():
	logger.info(f"[Auto web search keywords] {ws_query!r}")
	ws_result = do_web_search(ws_query, use_korean=use_korean)
	if not ws_result.startswith("Web search"):
	combined_system_msg += f"\n\n[Search Results]\n{ws_result}"
	if use_korean:
	combined_system_msg += "\n\n[중요: 답변에 검색 결과의 출처를 반드시 인용하세요]"
	else:
	combined_system_msg += "\n\n[Important: Always cite sources from search results in your answer]"

	messages.append({
	"role": "system",
	"content": combined_system_msg
	})

	# Add conversation history
	messages.extend(process_history(history))

	# Process current message
	user_content = process_new_user_message(message)
	messages.append({
	"role": "user",
	"content": user_content
	})

	# Debug log
	logger.debug(f"Total messages: {len(messages)}")

	# Call Friendli API and stream
	for response_text in stream_friendli_response(messages, max_new_tokens):
	yield response_text

	except Exception as e:
	logger.error(f"run function error: {str(e)}")
	yield f"⚠️ Sorry, an error occurred: {str(e)}"

	##############################################################################
	# Examples
	##############################################################################
	examples = [
	# PDF comparison example
	[
	{
	"text": "Compare the contents of the two PDF files.",
	"files": [
	"assets/additional-examples/before.pdf",
	"assets/additional-examples/after.pdf",
	],
	}
	],
	# CSV analysis example
	[
	{
	"text": "Summarize and analyze the contents of the CSV file.",
	"files": ["assets/additional-examples/sample-csv.csv"],
	}
	],
	# Web search example
	[
	{
	"text": "Explain discord.gg/openfreeai",
	"files": [],
	}
	],
	# Code generation example
	[
	{
	"text": "Write Python code to generate Fibonacci sequence.",
	"files": [],
	}
	],
	]

	##############################################################################
	# Gradio UI - CSS Styles (Removed blue colors)
	##############################################################################
	css = """
	/* Full width UI */
	.gradio-container {
	background: rgba(255, 255, 255, 0.95);
	padding: 30px 40px;
	margin: 20px auto;
	width: 100% !important;
	max-width: none !important;
	border-radius: 12px;
	box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
	}

	.fillable {
	width: 100% !important;
	max-width: 100% !important;
	}

	/* Background */
	body {
	background: linear-gradient(135deg, #f5f7fa 0%, #e0e0e0 100%);
	margin: 0;
	padding: 0;
	font-family: 'Segoe UI', 'Helvetica Neue', Arial, sans-serif;
	color: #333;
	}

	/* Button styles - neutral gray */
	button, .btn {
	background: #6b7280 !important;
	border: none;
	color: white !important;
	padding: 10px 20px;
	text-transform: uppercase;
	font-weight: 600;
	letter-spacing: 0.5px;
	cursor: pointer;
	border-radius: 6px;
	transition: all 0.3s ease;
	}

	button:hover, .btn:hover {
	background: #4b5563 !important;
	transform: translateY(-1px);
	box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
	}

	/* Examples section */
	#examples_container, .examples-container {
	margin: 20px auto;
	width: 90%;
	background: rgba(255, 255, 255, 0.8);
	padding: 20px;
	border-radius: 8px;
	}

	#examples_row, .examples-row {
	justify-content: center;
	}

	/* Example buttons */
	.gr-samples-table button,
	.gr-examples button,
	.examples button {
	background: #f0f2f5 !important;
	border: 1px solid #d1d5db;
	color: #374151 !important;
	margin: 5px;
	font-size: 14px;
	}

	.gr-samples-table button:hover,
	.gr-examples button:hover,
	.examples button:hover {
	background: #e5e7eb !important;
	border-color: #9ca3af;
	}

	/* Chat interface */
	.chatbox, .chatbot {
	background: white !important;
	border-radius: 8px;
	box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
	}

	.message {
	padding: 15px;
	margin: 10px 0;
	border-radius: 8px;
	}

	/* Input styles */
	.multimodal-textbox, textarea, input[type="text"] {
	background: white !important;
	border: 1px solid #d1d5db;
	border-radius: 6px;
	padding: 10px;
	font-size: 16px;
	}

	.multimodal-textbox:focus, textarea:focus, input[type="text"]:focus {
	border-color: #6b7280;
	outline: none;
	box-shadow: 0 0 0 3px rgba(107, 114, 128, 0.1);
	}

	/* Warning messages */
	.warning-box {
	background: #fef3c7 !important;
	border: 1px solid #f59e0b;
	border-radius: 8px;
	padding: 15px;
	margin: 10px 0;
	color: #92400e;
	}

	/* Headings */
	h1, h2, h3 {
	color: #1f2937;
	}

	/* Links - neutral gray */
	a {
	color: #6b7280;
	text-decoration: none;
	}

	a:hover {
	text-decoration: underline;
	color: #4b5563;
	}

	/* Slider */
	.gr-slider {
	margin: 15px 0;
	}

	/* Checkbox */
	input[type="checkbox"] {
	width: 18px;
	height: 18px;
	margin-right: 8px;
	}

	/* Scrollbar */
	::-webkit-scrollbar {
	width: 8px;
	height: 8px;
	}

	::-webkit-scrollbar-track {
	background: #f1f1f1;
	}

	::-webkit-scrollbar-thumb {
	background: #888;
	border-radius: 4px;
	}

	::-webkit-scrollbar-thumb:hover {
	background: #555;
	}
	"""

	##############################################################################
	# Gradio UI Main
	##############################################################################
	with gr.Blocks(css=css, title="Gemma-3-R1984-27B Chatbot") as demo:
	# Title
	gr.Markdown("# 🤗 Gemma-3-R1984-27B Chatbot")
	gr.Markdown("Community: [https://discord.gg/openfreeai](https://discord.gg/openfreeai)")

	# UI Components
	with gr.Row():
	with gr.Column(scale=2):
	web_search_checkbox = gr.Checkbox(
	label="🔍 Enable Deep Research (Web Search)",
	value=False,
	info="Check for questions requiring latest information"
	)
	with gr.Column(scale=1):
	korean_checkbox = gr.Checkbox(
	label="🇰🇷 한글 (Korean)",
	value=False,
	info="Check for Korean responses"
	)
	with gr.Column(scale=1):
	max_tokens_slider = gr.Slider(
	label="Max Tokens",
	minimum=100,
	maximum=8000,
	step=50,
	value=1000,
	info="Adjust response length"
	)

	# Main chat interface
	chat = gr.ChatInterface(
	fn=run,
	type="messages",
	chatbot=gr.Chatbot(type="messages", scale=1),
	textbox=gr.MultimodalTextbox(
	file_types=[
	".webp", ".png", ".jpg", ".jpeg", ".gif",
	".mp4", ".csv", ".txt", ".pdf"
	],
	file_count="multiple",
	autofocus=True,
	placeholder="Enter text or upload PDF, CSV, TXT files. (Images/videos not supported in this demo)"
	),
	multimodal=True,
	additional_inputs=[
	max_tokens_slider,
	web_search_checkbox,
	korean_checkbox,
	],
	stop_btn=False,
	examples=examples,
	run_examples_on_click=False,
	cache_examples=False,
	delete_cache=(1800, 1800),
	)

	if __name__ == "__main__":
	demo.launch()