VincentGOURBIN's picture
Upload app.py with huggingface_hub
b6c18a3 verified
import gradio as gr
import torch
from diffusers import DiffusionPipeline
import random
import os
import sys
import time
# Set PyTorch MPS fallback for Apple Silicon compatibility
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
# Check for dev mode
DEV_MODE = "--dev" in sys.argv
# Import spaces for HuggingFace deployment
try:
import spaces
HF_SPACES = True
print("🚀 Running on HuggingFace Spaces with ZeroGPU")
# Optimize for ZeroGPU performance
torch.set_float32_matmul_precision('high') # Enable TensorFloat32 for better performance
torch.backends.cudnn.allow_tf32 = True # Enable TF32 on cuDNN
except ImportError:
HF_SPACES = False
print("🏠 Running locally - spaces module not available")
# MCP is always enabled
print("🔌 MCP protocol enabled - tools available for external access")
MAX_SEED = 2**32 - 1
# Liste des catégories pour le chatbot (100+ catégories variées)
CHAT_CATEGORIES = [
# Animaux et créatures
"animal", "bird", "sea creature", "insect", "mythical creature", "prehistoric creature",
# Couleurs et teintes
"color", "shade", "metallic color", "gemstone color",
# Objets et artefacts
"weapon", "tool", "musical instrument", "piece of furniture", "ancient artifact", "modern gadget",
# Émotions et traits
"emotion", "personality trait", "mood", "mental state", "virtue", "flaw",
# Nature et éléments
"natural element", "weather phenomenon", "season", "time of day", "celestial body", "landscape",
# Plantes et végétaux
"flower", "tree", "herb", "fruit", "vegetable", "mushroom",
# Arts et culture
"art style", "musical genre", "dance style", "literary genre", "architectural style", "fashion style",
# Matériaux et textures
"fabric", "metal", "stone", "wood type", "crystal", "texture",
# Géographie et lieux
"country", "city type", "building", "room", "natural landmark", "climate zone",
# Histoire et époques
"historical period", "ancient civilization", "mythology", "legend", "cultural tradition",
# Sciences et cosmos
"planet", "star type", "galaxy", "chemical element", "geometric shape", "mathematical concept",
# Sens et perceptions
"scent", "taste", "sound", "touch sensation", "visual effect", "temperature",
# Énergies et forces
"type of energy", "natural force", "magical power", "spiritual element", "life force",
# Professions et rôles
"profession", "fantasy role", "mythical being", "guardian spirit", "mentor figure",
# Activités et actions
"hobby", "sport", "art form", "ritual", "celebration", "journey type",
# Abstractions et concepts
"philosophical concept", "virtue", "sin", "dream", "fear", "hope", "memory type",
# Objets magiques et fantastiques
"magical item", "enchanted object", "potion ingredient", "spell component", "rune",
# Environnements spéciaux
"mystical place", "hidden realm", "sacred space", "forbidden zone", "lost city"
]
# Variable globale pour tracking des catégories utilisées dans la conversation
used_categories = []
def get_next_category():
"""Retourne une catégorie aléatoire non utilisée"""
available_categories = [cat for cat in CHAT_CATEGORIES if cat not in used_categories]
if not available_categories:
# Si toutes les catégories ont été utilisées, reset
used_categories.clear()
available_categories = CHAT_CATEGORIES.copy()
category = random.choice(available_categories)
used_categories.append(category)
return category
def simple_chat_response(user_message, history):
"""Logique simple de chat sans LLM - pose juste la question suivante"""
if not user_message.strip():
return "Please type your answer."
# Si c'est le début de la conversation
if len(history) == 0 or user_message.lower().strip() in ["ready", "start", "begin"]:
used_categories.clear() # Reset les catégories
category = get_next_category()
return f"If you were {category}, what would you be?"
# Sinon, poser la question suivante
category = get_next_category()
return f"If you were {category}, what would you be?"
def load_flux_model():
dtype = torch.bfloat16
# For HuggingFace Spaces, prioritize CUDA
if HF_SPACES and torch.cuda.is_available():
device = "cuda"
# For local development, prioritize MPS for Apple Silicon
elif torch.backends.mps.is_available():
device = "mps"
elif torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
print(f"Using device for FLUX: {device}")
pipe = DiffusionPipeline.from_pretrained(
"black-forest-labs/FLUX.1-schnell",
torch_dtype=dtype
).to(device)
return pipe
flux_pipe = load_flux_model()
def generate_simple_flux_prompt(user_responses):
"""Generate simple FLUX prompt by concatenating user responses"""
# Extraire seulement les réponses utilisateur (pas les "si j'étais")
responses = [response.strip() for response in user_responses if response.strip()]
# Concatener avec des virgules
if responses:
concatenated = ", ".join(responses)
return f"digital portrait with the following criteria: {concatenated}"
else:
return "digital portrait with the following criteria: artistic avatar"
# Multilingual support
def get_translations():
return {
"en": {
"title": "🎭 Avatar Generator - Chinese Portrait",
"subtitle": "Complete at least the first 3 groups to generate your personalized avatar.",
"portrait_title": "📝 Chinese Portrait (first 3 groups required)",
"group": "Group",
"required": "Required",
"optional": "Optional",
"if_i_was": "If I was",
"i_would_be": "I would be",
"generate_btn": "🎨 Generate Avatar",
"avatar_title": "🖼️ Generated Avatar",
"your_avatar": "Your Avatar",
"information": "Information",
"error_required": "Error: The first 3 groups of fields are required.",
"success": "Avatar generated successfully!",
"prompt_used": "Prompt used:",
"error_generation": "Error during generation:",
"footer": "Avatar generated with FLUX.1-schnell",
"quality_normal": "Normal Quality (4 steps, 512x512)",
"quality_high": "High Quality (8 steps, 512x512)",
"quality_label": "Quality:",
"tab_form": "📝 Form Mode",
"tab_chat": "💬 Chat Mode",
"chat_title": "🤖 AI Assistant - Avatar Creator",
"chat_subtitle": "Let me guide you through creating your Chinese portrait!",
"thinking": "Thinking...",
"placeholders": {
"animal": "an animal...",
"animal_answer": "a lion...",
"color": "a color...",
"color_answer": "red...",
"object": "an object...",
"object_answer": "a sword...",
"feeling": "a feeling...",
"feeling_answer": "joy...",
"element": "an element...",
"element_answer": "fire..."
}
},
"fr": {
"title": "🎭 Générateur d'Avatar - Portrait Chinois",
"subtitle": "Complétez au minimum les 3 premiers groupes pour générer votre avatar personnalisé.",
"portrait_title": "📝 Portrait Chinois (3 premiers groupes obligatoires)",
"group": "Groupe",
"required": "Obligatoire",
"optional": "Optionnel",
"if_i_was": "Si j'étais",
"i_would_be": "Je serais",
"generate_btn": "🎨 Générer l'Avatar",
"avatar_title": "🖼️ Avatar Généré",
"your_avatar": "Votre Avatar",
"information": "Informations",
"error_required": "Erreur: Les 3 premiers groupes de champs sont obligatoires.",
"success": "Avatar généré avec succès!",
"prompt_used": "Prompt utilisé:",
"error_generation": "Erreur lors de la génération:",
"footer": "Avatar généré avec FLUX.1-schnell",
"quality_normal": "Qualité Normale (4 étapes, 512x512)",
"quality_high": "Haute Qualité (8 étapes, 512x512)",
"quality_label": "Qualité:",
"tab_form": "📝 Mode Formulaire",
"tab_chat": "💬 Mode Chat",
"chat_title": "🤖 Assistant IA - Créateur d'Avatar",
"chat_subtitle": "Laissez-moi vous guider pour créer votre portrait chinois!",
"thinking": "Réflexion...",
"placeholders": {
"animal": "un animal...",
"animal_answer": "un lion...",
"color": "une couleur...",
"color_answer": "rouge...",
"object": "un objet...",
"object_answer": "une épée...",
"feeling": "un sentiment...",
"feeling_answer": "la joie...",
"element": "un élément...",
"element_answer": "le feu..."
}
}
}
# Dev mode default values
def get_dev_defaults():
return {
"if1": "an animal", "would1": "a majestic wolf",
"if2": "a color", "would2": "deep purple",
"if3": "an object", "would3": "an ancient sword",
"if4": "a feeling", "would4": "fierce determination",
"if5": "an element", "would5": "lightning"
}
# Apply ZeroGPU decorator if available
if HF_SPACES:
@spaces.GPU()
def generate_avatar(if1: str, would1: str, if2: str, would2: str, if3: str, would3: str, if4: str = "", would4: str = "", if5: str = "", would5: str = "", language: str = "en", quality: str = "normal"):
return _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality)
else:
def generate_avatar(if1: str, would1: str, if2: str, would2: str, if3: str, would3: str, if4: str = "", would4: str = "", if5: str = "", would5: str = "", language: str = "en", quality: str = "normal"):
return _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality)
@spaces.GPU() if HF_SPACES else lambda x: x
def _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality):
translations = get_translations()
t = translations.get(language, translations["en"])
# Validation des champs obligatoires
if not if1 or not would1 or not if2 or not would2 or not if3 or not would3:
return None, t["error_required"]
# Collecter toutes les réponses utilisateur
user_responses = [would1, would2, would3]
if would4:
user_responses.append(would4)
if would5:
user_responses.append(would5)
# Générer le prompt simple
prompt = generate_simple_flux_prompt(user_responses)
try:
# Configuration selon la qualité
if quality == "high":
width, height, steps = 512, 512, 8
else:
width, height, steps = 512, 512, 4
# Génération avec seed aléatoire
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device=flux_pipe.device).manual_seed(seed)
image = flux_pipe(
prompt=prompt,
width=width,
height=height,
num_inference_steps=steps,
guidance_scale=0.0,
generator=generator
).images[0]
return image, f"{t['success']}\n{t['prompt_used']} {prompt}\nSeed: {seed}\nQuality: {quality} ({steps} steps, {width}x{height})"
except Exception as e:
return None, f"{t['error_generation']} {str(e)}"
@spaces.GPU() if HF_SPACES else lambda x: x
def generate_avatar_from_chat(history: list, language: str = "en", quality: str = "normal"):
"""
Generate avatar from conversation history with AI assistant.
"""
# Extraire directement les réponses utilisateur de la conversation
user_responses = []
for user_msg, assistant_msg in history:
if user_msg and user_msg.strip() and not user_msg.lower().strip() in ["ready", "start", "begin", "let's start the chinese portrait game!"]:
# Ajouter la réponse de l'utilisateur
user_responses.append(user_msg.strip())
# Générer le prompt simple
prompt = generate_simple_flux_prompt(user_responses)
try:
# Configuration selon la qualité
if quality == "high":
width, height, steps = 512, 512, 8
else:
width, height, steps = 512, 512, 4
# Génération avec seed aléatoire
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device=flux_pipe.device).manual_seed(seed)
image = flux_pipe(
prompt=prompt,
width=width,
height=height,
num_inference_steps=steps,
guidance_scale=0.0,
generator=generator
).images[0]
responses_text = "\n".join([f"- {response}" for response in user_responses])
return image, f"Avatar generated from conversation!\n\nUser responses:\n{responses_text}\n\nPrompt: {prompt}\nSeed: {seed}\nQuality: {quality} ({steps} steps, {width}x{height})"
except Exception as e:
return None, f"Error during generation: {str(e)}"
def create_form_interface(language="en"):
translations = get_translations()
t = translations.get(language, translations["en"])
dev_defaults = get_dev_defaults() if DEV_MODE else {}
with gr.Column() as form_interface:
gr.Markdown(f"### {t['portrait_title']}")
# Commutateur de qualité
quality_radio = gr.Radio(
choices=["normal", "high"],
value="normal",
label=t["quality_label"]
)
# Groupe 1 (obligatoire)
gr.Markdown(f"**{t['group']} 1** ⭐ *{t['required']}*")
with gr.Row():
if1 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["animal"],
value=dev_defaults.get("if1", ""), scale=1)
would1 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["animal_answer"],
value=dev_defaults.get("would1", ""), scale=1)
# Groupe 2 (obligatoire)
gr.Markdown(f"**{t['group']} 2** ⭐ *{t['required']}*")
with gr.Row():
if2 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["color"],
value=dev_defaults.get("if2", ""), scale=1)
would2 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["color_answer"],
value=dev_defaults.get("would2", ""), scale=1)
# Groupe 3 (obligatoire)
gr.Markdown(f"**{t['group']} 3** ⭐ *{t['required']}*")
with gr.Row():
if3 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["object"],
value=dev_defaults.get("if3", ""), scale=1)
would3 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["object_answer"],
value=dev_defaults.get("would3", ""), scale=1)
# Groupe 4 (optionnel)
gr.Markdown(f"**{t['group']} 4** ✨ *{t['optional']}*")
with gr.Row():
if4 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["feeling"],
value=dev_defaults.get("if4", ""), scale=1)
would4 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["feeling_answer"],
value=dev_defaults.get("would4", ""), scale=1)
# Groupe 5 (optionnel)
gr.Markdown(f"**{t['group']} 5** ✨ *{t['optional']}*")
with gr.Row():
if5 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["element"],
value=dev_defaults.get("if5", ""), scale=1)
would5 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["element_answer"],
value=dev_defaults.get("would5", ""), scale=1)
generate_btn = gr.Button(t["generate_btn"], variant="primary", size="lg")
gr.Markdown(f"### {t['avatar_title']}")
output_image = gr.Image(label=t["your_avatar"], height=400)
output_text = gr.Textbox(label=t["information"], lines=4, interactive=False)
# Hidden state for language
lang_state = gr.State(value=language)
generate_btn.click(
fn=generate_avatar,
inputs=[if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, lang_state, quality_radio],
outputs=[output_image, output_text]
)
return form_interface
def create_chat_interface(language="en"):
translations = get_translations()
t = translations.get(language, translations["en"])
with gr.Column() as chat_interface:
gr.Markdown(f"### {t['chat_title']}")
gr.Markdown(t["chat_subtitle"])
chatbot = gr.Chatbot(height=400, show_copy_button=True)
# Zone de message avec bouton d'envoi
with gr.Row():
msg = gr.Textbox(label="Message", placeholder="Type your response here...", visible=False, scale=4)
send_btn = gr.Button("📤", visible=False, scale=1, min_width=50)
# Boutons de contrôle - en dessous du chat
with gr.Row():
start_btn = gr.Button("🚀 Start New Conversation", variant="primary", scale=1)
avatar_btn = gr.Button("🎨 Get My Avatar", variant="secondary", scale=1)
quality_chat = gr.Radio(choices=["normal", "high"], value="normal", label="Quality", scale=1)
# Résultats de génération d'avatar
avatar_output = gr.Image(label="Generated Avatar", visible=False)
avatar_info = gr.Textbox(label="Avatar Info", lines=4, interactive=False, visible=False)
# Hidden state for language
lang_state = gr.State(value=language)
def respond(message: str, history: list, language: str = "en"):
"""
Process user message and generate simple response using get_next_category().
"""
# Convert history format if needed
if history is None:
history = []
# Use simple chat logic instead of Gemma
response = simple_chat_response(message, history)
# Update history with user message and bot response
updated_history = history + [[message, response]]
# Yield the updated history (no streaming needed for simple logic)
yield "", updated_history
def start_conversation(language):
"""Démarre la conversation avec une question simple sans LLM"""
used_categories.clear() # Reset les catégories
# Générer la première question directement
first_category = get_next_category()
first_question = f"If you were {first_category}, what would you be?"
# Créer l'historique initial
initial_history = [["Let's start the Chinese Portrait game!", first_question]]
return initial_history, gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
def show_avatar_interface():
"""Affiche immédiatement l'interface avatar pour montrer que ça calcule"""
return gr.update(visible=True), gr.update(visible=True, value="Generating your avatar...")
def generate_avatar_from_conversation(history, language, quality):
if not history:
return None, "No conversation found. Please start a conversation first."
image, info = generate_avatar_from_chat(history, language, quality)
return image, info
# Événements
start_btn.click(
fn=start_conversation,
inputs=[lang_state],
outputs=[chatbot, msg, send_btn, avatar_output, avatar_info]
)
# Envoi via Enter ou bouton
msg.submit(
respond,
[msg, chatbot, lang_state],
[msg, chatbot],
queue=True
)
send_btn.click(
respond,
[msg, chatbot, lang_state],
[msg, chatbot],
queue=True
)
# Affichage immédiat de l'interface puis génération
avatar_btn.click(
show_avatar_interface,
outputs=[avatar_output, avatar_info]
).then(
generate_avatar_from_conversation,
inputs=[chatbot, lang_state, quality_chat],
outputs=[avatar_output, avatar_info]
)
gr.Markdown("*Click 'Start New Conversation' to begin, then 'Get My Avatar' when you've completed your portrait!*")
return chat_interface
# Create the main web interface with MCP tools integrated
with gr.Blocks(title="🎭 Avatar Generator") as demo:
gr.Markdown("# 🎭 Avatar Generator - Chinese Portrait")
gr.Markdown("Generate personalized avatars from Chinese portrait descriptions using FLUX.1-schnell")
with gr.Tabs():
# Main application tabs
with gr.Tab("📝 Form Mode"):
create_form_interface("en")
with gr.Tab("💬 Chat Mode"):
create_chat_interface("en")
gr.Markdown("---")
gr.Markdown("🔌 **MCP Integration**: This app exposes tools via MCP protocol at `/gradio_api/mcp/sse`")
gr.Markdown("*Avatar generated with FLUX.1-schnell*")
if __name__ == "__main__":
if DEV_MODE:
print("🚀 Running in DEV MODE with pre-filled values")
print("🔌 Starting server with MCP support...")
print("📡 MCP endpoint available at: http://localhost:7860/gradio_api/mcp/sse")
print("🌐 Web interface available at: http://localhost:7860")
demo.launch(mcp_server=True, show_api=True)