|
import gradio as gr |
|
import torch |
|
import time |
|
import os |
|
from functools import lru_cache |
|
from llama_cpp import Llama |
|
from huggingface_hub import hf_hub_download |
|
import threading |
|
|
|
|
|
MODEL_NAME = "Dorian2B/Vera-v1.5-Instruct-2B-GGUF" |
|
MODEL_FILE = "Vera-v1.5-Instruct-q5_K_M.gguf" |
|
MODEL_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "vera-model") |
|
|
|
|
|
os.makedirs(MODEL_CACHE_DIR, exist_ok=True) |
|
|
|
|
|
@lru_cache(maxsize=1) |
|
def download_model(): |
|
print("Téléchargement du modèle en cours...") |
|
model_path = hf_hub_download( |
|
repo_id=MODEL_NAME, |
|
filename=MODEL_FILE, |
|
cache_dir=MODEL_CACHE_DIR |
|
) |
|
print(f"Modèle téléchargé à {model_path}") |
|
return model_path |
|
|
|
|
|
def load_model_async(): |
|
global model |
|
model_path = download_model() |
|
|
|
|
|
model = Llama( |
|
model_path=model_path, |
|
n_ctx=4096, |
|
n_batch=512, |
|
n_gpu_layers=-1, |
|
verbose=False, |
|
seed=42 |
|
) |
|
print("Modèle chargé avec succès!") |
|
|
|
|
|
def format_prompt(message, history): |
|
|
|
prompt_parts = ["<|system|>\nTu es Vera, une assistante IA utile, honnête et inoffensive.\n</s>\n"] |
|
|
|
|
|
for user_msg, assistant_msg in history: |
|
prompt_parts.append(f"<|user|>\n{user_msg}\n</s>\n") |
|
prompt_parts.append(f"<|assistant|>\n{assistant_msg}\n</s>\n") |
|
|
|
|
|
prompt_parts.append(f"<|user|>\n{message}\n</s>\n<|assistant|>\n") |
|
|
|
return "".join(prompt_parts) |
|
|
|
|
|
model = None |
|
is_model_loading = False |
|
model_lock = threading.Lock() |
|
|
|
|
|
def generate_response(message, history, temperature=0.7, top_p=0.95, max_tokens=2048): |
|
global model, is_model_loading |
|
|
|
|
|
with model_lock: |
|
if model is None: |
|
if not is_model_loading: |
|
is_model_loading = True |
|
|
|
yield [{"role": "user", "content": message}, |
|
{"role": "assistant", "content": "Chargement du modèle en cours... Veuillez patienter."}] |
|
load_model_async() |
|
is_model_loading = False |
|
else: |
|
yield [{"role": "user", "content": message}, |
|
{"role": "assistant", "content": "Le modèle est en cours de chargement. Veuillez patienter."}] |
|
return |
|
|
|
|
|
if not message.strip(): |
|
return |
|
|
|
|
|
history_tuples = [] |
|
if history: |
|
for msg in history: |
|
if isinstance(msg, dict): |
|
|
|
if msg["role"] == "user": |
|
user_msg = msg["content"] |
|
if len(history_tuples) == 0 or len(history_tuples[-1]) < 2: |
|
history_tuples.append([user_msg, ""]) |
|
else: |
|
history_tuples.append([user_msg, ""]) |
|
elif msg["role"] == "assistant": |
|
if history_tuples and len(history_tuples[-1]) == 2: |
|
history_tuples[-1][1] = msg["content"] |
|
elif isinstance(msg, tuple) or isinstance(msg, list): |
|
|
|
history_tuples.append(msg) |
|
|
|
|
|
new_history_tuples = history_tuples + [(message, "")] |
|
|
|
|
|
prompt = format_prompt(message, history_tuples) |
|
|
|
response_text = "" |
|
|
|
try: |
|
|
|
for token in model.create_completion( |
|
prompt, |
|
max_tokens=max_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
top_k=40, |
|
repeat_penalty=1.1, |
|
stop=["</s>", "<|user|>", "<|system|>"], |
|
stream=True, |
|
): |
|
chunk = token["choices"][0]["text"] |
|
response_text += chunk |
|
|
|
|
|
new_history_messages = [] |
|
for i, (usr_msg, ast_msg) in enumerate(new_history_tuples): |
|
new_history_messages.append({"role": "user", "content": usr_msg}) |
|
|
|
if i == len(new_history_tuples) - 1: |
|
new_history_messages.append({"role": "assistant", "content": response_text}) |
|
elif ast_msg: |
|
new_history_messages.append({"role": "assistant", "content": ast_msg}) |
|
|
|
yield new_history_messages |
|
time.sleep(0.01) |
|
except Exception as e: |
|
|
|
error_message = f"Erreur pendant la génération: {str(e)}" |
|
yield [{"role": "user", "content": message}, |
|
{"role": "assistant", "content": error_message}] |
|
|
|
|
|
def reset_conversation(): |
|
return [], "" |
|
|
|
|
|
def update_params(temp, top_p, max_len): |
|
return gr.update(value=f"Température: {temp}, Top-p: {top_p}, Longueur max: {max_len}") |
|
|
|
|
|
custom_css = """ |
|
/* Masquer le pied de page Gradio */ |
|
footer {display: none !important} |
|
|
|
/* Style global du conteneur */ |
|
.gradio-container { |
|
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; |
|
background: #050f24; |
|
min-height: 100vh; |
|
} |
|
|
|
/* En-tête */ |
|
h1, h2, h3 { |
|
font-weight: 700; |
|
color: #f3f4f6; |
|
} |
|
|
|
/* Style de la zone de chat */ |
|
.chatbot-container { |
|
border-radius: 16px; |
|
box-shadow: 0 8px 24px rgba(0, 0, 0, 0.12); |
|
background-color: white; |
|
overflow: hidden; |
|
} |
|
|
|
/* Messages utilisateur */ |
|
.chatbot .user-message { |
|
background: linear-gradient(135deg, #6366f1, #8b5cf6); |
|
color: white; |
|
border-radius: 16px 16px 2px 16px; |
|
padding: 12px 16px; |
|
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1); |
|
max-width: 80%; |
|
margin: 8px 0; |
|
} |
|
|
|
/* Messages de l'assistant */ |
|
.chatbot .bot-message { |
|
background: #f3f4f6; |
|
color: #111827; |
|
border-radius: 16px 16px 16px 2px; |
|
padding: 12px 16px; |
|
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); |
|
max-width: 80%; |
|
margin: 8px 0; |
|
} |
|
|
|
/* Zone de texte */ |
|
.input-area { |
|
border-radius: 12px; |
|
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); |
|
border: 1px solid #e5e7eb; |
|
transition: all 0.3s ease; |
|
} |
|
.input-area:focus { |
|
border-color: #6366f1; |
|
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2); |
|
} |
|
|
|
/* Boutons */ |
|
.primary-btn { |
|
background: linear-gradient(135deg, #6366f1, #8b5cf6); |
|
border: none; |
|
border-radius: 8px; |
|
color: white; |
|
font-weight: 600; |
|
padding: 10px 16px; |
|
cursor: pointer; |
|
transition: all 0.3s ease; |
|
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1); |
|
} |
|
.primary-btn:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15); |
|
} |
|
.secondary-btn { |
|
background-color: #f3f4f6; |
|
border: 1px solid #e5e7eb; |
|
border-radius: 8px; |
|
color: #4b5563; |
|
font-weight: 500; |
|
padding: 10px 16px; |
|
cursor: pointer; |
|
transition: all 0.3s ease; |
|
} |
|
.secondary-btn:hover { |
|
background-color: #e5e7eb; |
|
} |
|
|
|
/* Accordéon personnalisé */ |
|
.accordion { |
|
border-radius: 8px; |
|
border: 1px solid #e5e7eb; |
|
margin-top: 16px; |
|
} |
|
.accordion-header { |
|
padding: 12px; |
|
font-weight: 600; |
|
color: #4b5563; |
|
} |
|
|
|
/* Animation de chargement */ |
|
@keyframes pulse { |
|
0% {opacity: 0.6;} |
|
50% {opacity: 1;} |
|
100% {opacity: 0.6;} |
|
} |
|
.loading-indicator { |
|
animation: pulse 1.5s infinite; |
|
display: inline-block; |
|
margin-right: 8px; |
|
} |
|
|
|
/* Personnalisation des paramètres */ |
|
.parameter-container { |
|
background-color: #111827; |
|
border-radius: 12px; |
|
padding: 16px; |
|
margin-top: 12px; |
|
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); |
|
} |
|
.parameter-slider { |
|
margin: 8px 0; |
|
} |
|
|
|
/* Style des badges */ |
|
.model-badge { |
|
display: inline-block; |
|
background-color: #818cf8; |
|
color: white; |
|
padding: 4px 8px; |
|
border-radius: 6px; |
|
font-size: 0.8rem; |
|
font-weight: 500; |
|
margin-right: 8px; |
|
} |
|
|
|
/* État du modèle */ |
|
.model-status { |
|
font-size: 0.9rem; |
|
color: #4b5563; |
|
margin-bottom: 12px; |
|
} |
|
.status-loaded { |
|
color: #10b981; |
|
} |
|
.status-loading { |
|
color: #f59e0b; |
|
} |
|
|
|
/* Ajustements pour responsive */ |
|
@media (max-width: 768px) { |
|
.gradio-container { |
|
padding: 12px; |
|
} |
|
.chatbot .user-message, .chatbot .bot-message { |
|
max-width: 90%; |
|
} |
|
} |
|
""" |
|
|
|
|
|
theme = gr.themes.Base( |
|
primary_hue="indigo", |
|
secondary_hue="purple", |
|
neutral_hue="slate", |
|
font=["Inter", "ui-sans-serif", "system-ui", "sans-serif"], |
|
font_mono=["Fira Code", "ui-monospace", "monospace"], |
|
).set( |
|
button_primary_background_fill="*primary_500", |
|
button_primary_background_fill_hover="*primary_600", |
|
button_primary_text_color="white", |
|
button_secondary_background_fill="*neutral_100", |
|
button_secondary_background_fill_hover="*neutral_200", |
|
button_secondary_text_color="*neutral_800", |
|
block_radius="12px", |
|
block_shadow="0 2px 8px rgba(0, 0, 0, 0.1)", |
|
input_radius="8px", |
|
input_shadow="0 2px 4px rgba(0, 0, 0, 0.05)", |
|
input_border_width="1px" |
|
) |
|
|
|
|
|
with gr.Blocks(css=custom_css, theme=theme) as demo: |
|
|
|
model_status = gr.State("non chargé") |
|
|
|
with gr.Row(): |
|
gr.Markdown(""" |
|
# 🌟 Vera - Assistant IA Français |
|
|
|
Un assistant conversationnel basé sur le modèle **Vera-v1.5-Instruct** optimisé pour le français. |
|
""") |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=4): |
|
|
|
status_indicator = gr.Markdown("💤 **Modèle**: En attente de chargement", elem_id="model-status") |
|
|
|
|
|
chatbot = gr.Chatbot( |
|
height=600, |
|
show_copy_button=True, |
|
avatar_images=("👤", "🤖"), |
|
type="messages", |
|
elem_id="chatbot", |
|
container=True, |
|
elem_classes="chatbot-container", |
|
) |
|
|
|
|
|
with gr.Row(): |
|
message = gr.Textbox( |
|
placeholder="Posez votre question à Vera...", |
|
lines=2, |
|
max_lines=10, |
|
container=True, |
|
elem_classes="input-area", |
|
scale=4, |
|
autofocus=True, |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=3): |
|
submit_btn = gr.Button("Envoyer", elem_classes="primary-btn") |
|
with gr.Column(scale=1): |
|
reset_btn = gr.Button("Nouvelle conversation", elem_classes="secondary-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="parameter-container"): |
|
gr.Markdown("### ⚙️ Paramètres") |
|
|
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Température", |
|
info="Contrôle la créativité (plus élevé = plus créatif)", |
|
elem_classes="parameter-slider" |
|
) |
|
|
|
top_p = gr.Slider( |
|
minimum=0.5, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-p", |
|
info="Contrôle la diversité des réponses", |
|
elem_classes="parameter-slider" |
|
) |
|
|
|
max_tokens = gr.Slider( |
|
minimum=256, |
|
maximum=4096, |
|
value=2048, |
|
step=256, |
|
label="Longueur maximale", |
|
info="Nombre maximum de tokens générés", |
|
elem_classes="parameter-slider" |
|
) |
|
|
|
apply_params = gr.Button("Appliquer", elem_classes="secondary-btn") |
|
params_info = gr.Markdown("Température: 0.7, Top-p: 0.95, Longueur max: 2048") |
|
|
|
with gr.Accordion("ℹ️ À propos du modèle", open=False, elem_classes="accordion"): |
|
gr.Markdown(""" |
|
### Vera v1.5 Instruct |
|
|
|
<div class="model-badge">GGUF</div> <div class="model-badge">Français</div> <div class="model-badge">2B</div> |
|
|
|
Ce modèle est basé sur **Vera-v1.5-Instruct-GGUF** développé par [Dorian2B](https://huggingface.co/Dorian2B/Vera-v1.5-Instruct-GGUF). |
|
|
|
**Caractéristiques:** |
|
- Modèle optimisé pour les conversations en français |
|
- Basé sur l'architecture Gemma2 |
|
- Support de contexte jusqu'à 8192 tokens |
|
- Quantifié en 8-bit pour de meilleures performances |
|
|
|
**Conseils d'utilisation:** |
|
- Posez des questions claires et précises |
|
- Pour de meilleurs résultats, ajustez la température selon vos besoins |
|
- Utilisez le bouton "Nouvelle conversation" pour réinitialiser le contexte |
|
""") |
|
|
|
|
|
|
|
apply_params.click( |
|
fn=update_params, |
|
inputs=[temperature, top_p, max_tokens], |
|
outputs=[params_info] |
|
) |
|
|
|
|
|
submit_btn.click( |
|
fn=generate_response, |
|
inputs=[message, chatbot, temperature, top_p, max_tokens], |
|
outputs=[chatbot] |
|
).then( |
|
fn=lambda: "", |
|
outputs=[message] |
|
).then( |
|
fn=lambda: gr.update(value="🟢 **Modèle**: Chargé et prêt"), |
|
outputs=[status_indicator] |
|
) |
|
|
|
|
|
message.submit( |
|
fn=generate_response, |
|
inputs=[message, chatbot, temperature, top_p, max_tokens], |
|
outputs=[chatbot] |
|
).then( |
|
fn=lambda: "", |
|
outputs=[message] |
|
).then( |
|
fn=lambda: gr.update(value="🟢 **Modèle**: Chargé et prêt"), |
|
outputs=[status_indicator] |
|
) |
|
|
|
|
|
reset_btn.click( |
|
fn=reset_conversation, |
|
outputs=[chatbot, message] |
|
) |
|
|
|
|
|
threading.Thread(target=load_model_async, daemon=True).start() |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.queue() |
|
demo.launch( |
|
share=True, |
|
show_error=True, |
|
debug=False, |
|
max_threads=4, |
|
) |