Spaces:

VincentGOURBIN
/

IceBreaker-Avator-Generator

Running on Zero

File size: 22,700 Bytes

import gradio as gr
import torch
from diffusers import DiffusionPipeline
import random
import os
import sys
import time

# Set PyTorch MPS fallback for Apple Silicon compatibility
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

# Check for dev mode
DEV_MODE = "--dev" in sys.argv

# Import spaces for HuggingFace deployment
try:
    import spaces
    HF_SPACES = True
    print("🚀 Running on HuggingFace Spaces with ZeroGPU")
    
    # Optimize for ZeroGPU performance
    torch.set_float32_matmul_precision('high')  # Enable TensorFloat32 for better performance
    torch.backends.cudnn.allow_tf32 = True     # Enable TF32 on cuDNN
    
except ImportError:
    HF_SPACES = False
    print("🏠 Running locally - spaces module not available")

# MCP is always enabled
print("🔌 MCP protocol enabled - tools available for external access")

MAX_SEED = 2**32 - 1

# Liste des catégories pour le chatbot (100+ catégories variées)
CHAT_CATEGORIES = [
    # Animaux et créatures
    "animal", "bird", "sea creature", "insect", "mythical creature", "prehistoric creature",
    
    # Couleurs et teintes  
    "color", "shade", "metallic color", "gemstone color",
    
    # Objets et artefacts
    "weapon", "tool", "musical instrument", "piece of furniture", "ancient artifact", "modern gadget",
    
    # Émotions et traits
    "emotion", "personality trait", "mood", "mental state", "virtue", "flaw",
    
    # Nature et éléments
    "natural element", "weather phenomenon", "season", "time of day", "celestial body", "landscape",
    
    # Plantes et végétaux
    "flower", "tree", "herb", "fruit", "vegetable", "mushroom",
    
    # Arts et culture
    "art style", "musical genre", "dance style", "literary genre", "architectural style", "fashion style",
    
    # Matériaux et textures
    "fabric", "metal", "stone", "wood type", "crystal", "texture",
    
    # Géographie et lieux
    "country", "city type", "building", "room", "natural landmark", "climate zone",
    
    # Histoire et époques
    "historical period", "ancient civilization", "mythology", "legend", "cultural tradition",
    
    # Sciences et cosmos
    "planet", "star type", "galaxy", "chemical element", "geometric shape", "mathematical concept",
    
    # Sens et perceptions
    "scent", "taste", "sound", "touch sensation", "visual effect", "temperature",
    
    # Énergies et forces
    "type of energy", "natural force", "magical power", "spiritual element", "life force",
    
    # Professions et rôles
    "profession", "fantasy role", "mythical being", "guardian spirit", "mentor figure",
    
    # Activités et actions
    "hobby", "sport", "art form", "ritual", "celebration", "journey type",
    
    # Abstractions et concepts
    "philosophical concept", "virtue", "sin", "dream", "fear", "hope", "memory type",
    
    # Objets magiques et fantastiques
    "magical item", "enchanted object", "potion ingredient", "spell component", "rune",
    
    # Environnements spéciaux
    "mystical place", "hidden realm", "sacred space", "forbidden zone", "lost city"
]

# Variable globale pour tracking des catégories utilisées dans la conversation
used_categories = []

def get_next_category():
    """Retourne une catégorie aléatoire non utilisée"""
    available_categories = [cat for cat in CHAT_CATEGORIES if cat not in used_categories]
    
    if not available_categories:
        # Si toutes les catégories ont été utilisées, reset
        used_categories.clear()
        available_categories = CHAT_CATEGORIES.copy()
    
    category = random.choice(available_categories)
    used_categories.append(category)
    return category

def simple_chat_response(user_message, history):
    """Logique simple de chat sans LLM - pose juste la question suivante"""
    if not user_message.strip():
        return "Please type your answer."
    
    # Si c'est le début de la conversation
    if len(history) == 0 or user_message.lower().strip() in ["ready", "start", "begin"]:
        used_categories.clear()  # Reset les catégories
        category = get_next_category()
        return f"If you were {category}, what would you be?"
    
    # Sinon, poser la question suivante
    category = get_next_category()
    return f"If you were {category}, what would you be?"

def load_flux_model():
    dtype = torch.bfloat16
    
    # For HuggingFace Spaces, prioritize CUDA
    if HF_SPACES and torch.cuda.is_available():
        device = "cuda"
    # For local development, prioritize MPS for Apple Silicon
    elif torch.backends.mps.is_available():
        device = "mps"
    elif torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"
    
    print(f"Using device for FLUX: {device}")
    
    pipe = DiffusionPipeline.from_pretrained(
        "black-forest-labs/FLUX.1-schnell", 
        torch_dtype=dtype
    ).to(device)
    
    return pipe

flux_pipe = load_flux_model()

def generate_simple_flux_prompt(user_responses):
    """Generate simple FLUX prompt by concatenating user responses"""
    # Extraire seulement les réponses utilisateur (pas les "si j'étais")
    responses = [response.strip() for response in user_responses if response.strip()]
    
    # Concatener avec des virgules
    if responses:
        concatenated = ", ".join(responses)
        return f"digital portrait with the following criteria: {concatenated}"
    else:
        return "digital portrait with the following criteria: artistic avatar"

# Multilingual support
def get_translations():
    return {
        "en": {
            "title": "🎭 Avatar Generator - Chinese Portrait",
            "subtitle": "Complete at least the first 3 groups to generate your personalized avatar.",
            "portrait_title": "📝 Chinese Portrait (first 3 groups required)",
            "group": "Group",
            "required": "Required",
            "optional": "Optional",
            "if_i_was": "If I was",
            "i_would_be": "I would be",
            "generate_btn": "🎨 Generate Avatar",
            "avatar_title": "🖼️ Generated Avatar",
            "your_avatar": "Your Avatar",
            "information": "Information",
            "error_required": "Error: The first 3 groups of fields are required.",
            "success": "Avatar generated successfully!",
            "prompt_used": "Prompt used:",
            "error_generation": "Error during generation:",
            "footer": "Avatar generated with FLUX.1-schnell",
            "quality_normal": "Normal Quality (4 steps, 512x512)",
            "quality_high": "High Quality (8 steps, 512x512)",
            "quality_label": "Quality:",
            "tab_form": "📝 Form Mode",
            "tab_chat": "💬 Chat Mode",
            "chat_title": "🤖 AI Assistant - Avatar Creator",
            "chat_subtitle": "Let me guide you through creating your Chinese portrait!",
            "thinking": "Thinking...",
            "placeholders": {
                "animal": "an animal...",
                "animal_answer": "a lion...",
                "color": "a color...",
                "color_answer": "red...",
                "object": "an object...",
                "object_answer": "a sword...",
                "feeling": "a feeling...",
                "feeling_answer": "joy...",
                "element": "an element...",
                "element_answer": "fire..."
            }
        },
        "fr": {
            "title": "🎭 Générateur d'Avatar - Portrait Chinois",
            "subtitle": "Complétez au minimum les 3 premiers groupes pour générer votre avatar personnalisé.",
            "portrait_title": "📝 Portrait Chinois (3 premiers groupes obligatoires)",
            "group": "Groupe",
            "required": "Obligatoire",
            "optional": "Optionnel",
            "if_i_was": "Si j'étais",
            "i_would_be": "Je serais",
            "generate_btn": "🎨 Générer l'Avatar",
            "avatar_title": "🖼️ Avatar Généré",
            "your_avatar": "Votre Avatar",
            "information": "Informations",
            "error_required": "Erreur: Les 3 premiers groupes de champs sont obligatoires.",
            "success": "Avatar généré avec succès!",
            "prompt_used": "Prompt utilisé:",
            "error_generation": "Erreur lors de la génération:",
            "footer": "Avatar généré avec FLUX.1-schnell",
            "quality_normal": "Qualité Normale (4 étapes, 512x512)",
            "quality_high": "Haute Qualité (8 étapes, 512x512)",
            "quality_label": "Qualité:",
            "tab_form": "📝 Mode Formulaire",
            "tab_chat": "💬 Mode Chat",
            "chat_title": "🤖 Assistant IA - Créateur d'Avatar",
            "chat_subtitle": "Laissez-moi vous guider pour créer votre portrait chinois!",
            "thinking": "Réflexion...",
            "placeholders": {
                "animal": "un animal...",
                "animal_answer": "un lion...",
                "color": "une couleur...",
                "color_answer": "rouge...",
                "object": "un objet...",
                "object_answer": "une épée...",
                "feeling": "un sentiment...",
                "feeling_answer": "la joie...",
                "element": "un élément...",
                "element_answer": "le feu..."
            }
        }
    }

# Dev mode default values
def get_dev_defaults():
    return {
        "if1": "an animal", "would1": "a majestic wolf",
        "if2": "a color", "would2": "deep purple",
        "if3": "an object", "would3": "an ancient sword",
        "if4": "a feeling", "would4": "fierce determination",
        "if5": "an element", "would5": "lightning"
    }

# Apply ZeroGPU decorator if available
if HF_SPACES:
    @spaces.GPU()
    def generate_avatar(if1: str, would1: str, if2: str, would2: str, if3: str, would3: str, if4: str = "", would4: str = "", if5: str = "", would5: str = "", language: str = "en", quality: str = "normal"):
        return _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality)
else:
    def generate_avatar(if1: str, would1: str, if2: str, would2: str, if3: str, would3: str, if4: str = "", would4: str = "", if5: str = "", would5: str = "", language: str = "en", quality: str = "normal"):
        return _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality)

@spaces.GPU() if HF_SPACES else lambda x: x
def _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality):
    translations = get_translations()
    t = translations.get(language, translations["en"])
    
    # Validation des champs obligatoires
    if not if1 or not would1 or not if2 or not would2 or not if3 or not would3:
        return None, t["error_required"]
    
    # Collecter toutes les réponses utilisateur
    user_responses = [would1, would2, would3]
    if would4:
        user_responses.append(would4)
    if would5:
        user_responses.append(would5)
    
    # Générer le prompt simple
    prompt = generate_simple_flux_prompt(user_responses)
    
    try:
        # Configuration selon la qualité
        if quality == "high":
            width, height, steps = 512, 512, 8
        else:
            width, height, steps = 512, 512, 4
            
        
        # Génération avec seed aléatoire
        seed = random.randint(0, MAX_SEED)
        generator = torch.Generator(device=flux_pipe.device).manual_seed(seed)
        
        image = flux_pipe(
            prompt=prompt,
            width=width,
            height=height,
            num_inference_steps=steps,
            guidance_scale=0.0,
            generator=generator
        ).images[0]
        
        return image, f"{t['success']}\n{t['prompt_used']} {prompt}\nSeed: {seed}\nQuality: {quality} ({steps} steps, {width}x{height})"
    
    except Exception as e:
        return None, f"{t['error_generation']} {str(e)}"

@spaces.GPU() if HF_SPACES else lambda x: x
def generate_avatar_from_chat(history: list, language: str = "en", quality: str = "normal"):
    """
    Generate avatar from conversation history with AI assistant.
    """
    # Extraire directement les réponses utilisateur de la conversation
    user_responses = []
    for user_msg, assistant_msg in history:
        if user_msg and user_msg.strip() and not user_msg.lower().strip() in ["ready", "start", "begin", "let's start the chinese portrait game!"]:
            # Ajouter la réponse de l'utilisateur
            user_responses.append(user_msg.strip())
    
    # Générer le prompt simple
    prompt = generate_simple_flux_prompt(user_responses)
    
    try:
        # Configuration selon la qualité
        if quality == "high":
            width, height, steps = 512, 512, 8
        else:
            width, height, steps = 512, 512, 4
            
        
        # Génération avec seed aléatoire
        seed = random.randint(0, MAX_SEED)
        generator = torch.Generator(device=flux_pipe.device).manual_seed(seed)
        
        image = flux_pipe(
            prompt=prompt,
            width=width,
            height=height,
            num_inference_steps=steps,
            guidance_scale=0.0,
            generator=generator
        ).images[0]
        
        responses_text = "\n".join([f"- {response}" for response in user_responses])
        
        return image, f"Avatar generated from conversation!\n\nUser responses:\n{responses_text}\n\nPrompt: {prompt}\nSeed: {seed}\nQuality: {quality} ({steps} steps, {width}x{height})"
    
    except Exception as e:
        return None, f"Error during generation: {str(e)}"

def create_form_interface(language="en"):
    translations = get_translations()
    t = translations.get(language, translations["en"])
    dev_defaults = get_dev_defaults() if DEV_MODE else {}
    
    with gr.Column() as form_interface:
        gr.Markdown(f"### {t['portrait_title']}")
        
        # Commutateur de qualité
        quality_radio = gr.Radio(
            choices=["normal", "high"],
            value="normal",
            label=t["quality_label"]
        )
        
        # Groupe 1 (obligatoire)
        gr.Markdown(f"**{t['group']} 1** ⭐ *{t['required']}*")
        with gr.Row():
            if1 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["animal"], 
                           value=dev_defaults.get("if1", ""), scale=1)
            would1 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["animal_answer"], 
                              value=dev_defaults.get("would1", ""), scale=1)
        
        # Groupe 2 (obligatoire)
        gr.Markdown(f"**{t['group']} 2** ⭐ *{t['required']}*")
        with gr.Row():
            if2 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["color"], 
                           value=dev_defaults.get("if2", ""), scale=1)
            would2 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["color_answer"], 
                              value=dev_defaults.get("would2", ""), scale=1)
        
        # Groupe 3 (obligatoire)
        gr.Markdown(f"**{t['group']} 3** ⭐ *{t['required']}*")
        with gr.Row():
            if3 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["object"], 
                           value=dev_defaults.get("if3", ""), scale=1)
            would3 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["object_answer"], 
                              value=dev_defaults.get("would3", ""), scale=1)
        
        # Groupe 4 (optionnel)
        gr.Markdown(f"**{t['group']} 4** ✨ *{t['optional']}*")
        with gr.Row():
            if4 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["feeling"], 
                           value=dev_defaults.get("if4", ""), scale=1)
            would4 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["feeling_answer"], 
                              value=dev_defaults.get("would4", ""), scale=1)
        
        # Groupe 5 (optionnel)
        gr.Markdown(f"**{t['group']} 5** ✨ *{t['optional']}*")
        with gr.Row():
            if5 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["element"], 
                           value=dev_defaults.get("if5", ""), scale=1)
            would5 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["element_answer"], 
                              value=dev_defaults.get("would5", ""), scale=1)
        
        generate_btn = gr.Button(t["generate_btn"], variant="primary", size="lg")
        
        gr.Markdown(f"### {t['avatar_title']}")
        output_image = gr.Image(label=t["your_avatar"], height=400)
        output_text = gr.Textbox(label=t["information"], lines=4, interactive=False)
        
        # Hidden state for language
        lang_state = gr.State(value=language)
        
        generate_btn.click(
            fn=generate_avatar,
            inputs=[if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, lang_state, quality_radio],
            outputs=[output_image, output_text]
        )
        
    return form_interface

def create_chat_interface(language="en"):
    translations = get_translations()
    t = translations.get(language, translations["en"])
    
    with gr.Column() as chat_interface:
        gr.Markdown(f"### {t['chat_title']}")
        gr.Markdown(t["chat_subtitle"])
        
        chatbot = gr.Chatbot(height=400, show_copy_button=True)
        
        # Zone de message avec bouton d'envoi
        with gr.Row():
            msg = gr.Textbox(label="Message", placeholder="Type your response here...", visible=False, scale=4)
            send_btn = gr.Button("📤", visible=False, scale=1, min_width=50)
        
        # Boutons de contrôle - en dessous du chat
        with gr.Row():
            start_btn = gr.Button("🚀 Start New Conversation", variant="primary", scale=1)
            avatar_btn = gr.Button("🎨 Get My Avatar", variant="secondary", scale=1)
            quality_chat = gr.Radio(choices=["normal", "high"], value="normal", label="Quality", scale=1)
        
        # Résultats de génération d'avatar
        avatar_output = gr.Image(label="Generated Avatar", visible=False)
        avatar_info = gr.Textbox(label="Avatar Info", lines=4, interactive=False, visible=False)
        
        # Hidden state for language
        lang_state = gr.State(value=language)
        
        def respond(message: str, history: list, language: str = "en"):
            """
            Process user message and generate simple response using get_next_category().
            """
            
            # Convert history format if needed
            if history is None:
                history = []
            
            # Use simple chat logic instead of Gemma
            response = simple_chat_response(message, history)
            
            # Update history with user message and bot response
            updated_history = history + [[message, response]]
            
            # Yield the updated history (no streaming needed for simple logic)
            yield "", updated_history
        
        def start_conversation(language):
            """Démarre la conversation avec une question simple sans LLM"""
            used_categories.clear()  # Reset les catégories
            
            # Générer la première question directement
            first_category = get_next_category()
            first_question = f"If you were {first_category}, what would you be?"
            
            # Créer l'historique initial
            initial_history = [["Let's start the Chinese Portrait game!", first_question]]
            
            return initial_history, gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
        
        def show_avatar_interface():
            """Affiche immédiatement l'interface avatar pour montrer que ça calcule"""
            return gr.update(visible=True), gr.update(visible=True, value="Generating your avatar...")
        
        def generate_avatar_from_conversation(history, language, quality):
            if not history:
                return None, "No conversation found. Please start a conversation first."
            
            image, info = generate_avatar_from_chat(history, language, quality)
            return image, info
        
        # Événements
        start_btn.click(
            fn=start_conversation,
            inputs=[lang_state],
            outputs=[chatbot, msg, send_btn, avatar_output, avatar_info]
        )
        
        # Envoi via Enter ou bouton
        msg.submit(
            respond, 
            [msg, chatbot, lang_state], 
            [msg, chatbot], 
            queue=True
        )
        
        send_btn.click(
            respond,
            [msg, chatbot, lang_state],
            [msg, chatbot],
            queue=True
        )
        
        # Affichage immédiat de l'interface puis génération
        avatar_btn.click(
            show_avatar_interface,
            outputs=[avatar_output, avatar_info]
        ).then(
            generate_avatar_from_conversation,
            inputs=[chatbot, lang_state, quality_chat],
            outputs=[avatar_output, avatar_info]
        )
        
        gr.Markdown("*Click 'Start New Conversation' to begin, then 'Get My Avatar' when you've completed your portrait!*")
    
    return chat_interface

# Create the main web interface with MCP tools integrated
with gr.Blocks(title="🎭 Avatar Generator") as demo:
    gr.Markdown("# 🎭 Avatar Generator - Chinese Portrait")
    gr.Markdown("Generate personalized avatars from Chinese portrait descriptions using FLUX.1-schnell")
    
    with gr.Tabs():
        # Main application tabs
        with gr.Tab("📝 Form Mode"):
            create_form_interface("en")
        
        with gr.Tab("💬 Chat Mode"):
            create_chat_interface("en")
    
    gr.Markdown("---")
    gr.Markdown("🔌 **MCP Integration**: This app exposes tools via MCP protocol at `/gradio_api/mcp/sse`")
    gr.Markdown("*Avatar generated with FLUX.1-schnell*")

if __name__ == "__main__":
    if DEV_MODE:
        print("🚀 Running in DEV MODE with pre-filled values")
    
    print("🔌 Starting server with MCP support...")
    print("📡 MCP endpoint available at: http://localhost:7860/gradio_api/mcp/sse")
    print("🌐 Web interface available at: http://localhost:7860")
    
    demo.launch(mcp_server=True, show_api=True)