Spaces:

suayptalha
/

QwQ-32B-Preview-Vision

Running

App Files Files Community

suayptalha commited on Dec 18, 2024

Commit

4e4190b

verified ·

1 Parent(s): 8f1cf32

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -63

app.py CHANGED Viewed

@@ -5,17 +5,16 @@ from huggingface_hub import InferenceClient
 # Moondream2 için Client kullanıyoruz
 moondream_client = Client("vikhyatk/moondream2")
-# Qwen/QwQ-32B-Preview için InferenceClient kullanıyoruz
 llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
-# Sohbet geçmişi
 history = []
 # Resim açıklama fonksiyonu
-def describe_image(image, user_message, history):
-    # Resim var mı diye kontrol et
-    if image is None:
-        return "No image provided", history  # Hata mesajı döndür
     # Resmi Moondream2 API'sine gönderiyoruz
     result = moondream_client.predict(
         img=handle_file(image),
@@ -23,70 +22,53 @@ def describe_image(image, user_message, history):
         api_name="/answer_question"
     )
-    description = result  # Moondream2'den açıklama alıyoruz
-    history.append({"role": "user", "content": user_message})  # string olarak
-    history.append({"role": "assistant", "content": description})  # string olarak
-    return description, history
-# Text ve history ile sohbet fonksiyonu
-def chat_with_text(user_message, history, max_new_tokens=250):
-    # Kullanıcı mesajını history'ye ekliyoruz
-    history.append({"role": "user", "content": user_message})  # string olarak
-    # Tüm geçmişi Qwen/QwQ-32B-Preview'e gönderiyoruz
-    texts = [{"role": msg["role"], "content": msg["content"]} for msg in history]
     llama_result = llama_client.chat_completion(
-        messages=texts,
-        max_tokens=max_new_tokens,
-        temperature=0.7,
-        top_p=0.95
     )
-    # Asistan cevabını alıyoruz ve history'ye ekliyoruz
-    assistant_reply = llama_result["choices"][0]["message"]["content"]
-    history.append({"role": "assistant", "content": assistant_reply})  # string olarak
-    return assistant_reply, history
-# Resim ve/veya metin tabanlı sohbet fonksiyonu
-def bot_streaming(message, history=None, max_new_tokens=250):
-    if history is None:  # Eğer `history` verilmemişse boş bir liste kullanıyoruz
-        history = []
-    user_message = message.get("text", "")
-    image = message.get("image", None)
-    if image:  # Resim varsa
-        response, history = describe_image(image, user_message, history)
-    else:  # Sadece metin mesajı varsa
-        response, history = chat_with_text(user_message, history, max_new_tokens)
-    # Yalnızca metin döndürülmeli, tarihçe değil
-    return response, history
 # Gradio arayüzü
-demo = gr.ChatInterface(
-    fn=bot_streaming,
-    title="Multimodal Chat Assistant",
-    additional_inputs=[
-        gr.Slider(
-            minimum=10,
-            maximum=500,
-            value=250,
-            step=10,
-            label="Maximum number of new tokens to generate",
-        )
     ],
-    description=(
-        "This demo combines text and image understanding using Moondream2 for visual "
-        "tasks and Qwen/QwQ-32B-Preview for conversational AI. Upload an image, ask questions, "
-        "or just chat!"
-    ),
-    stop_btn="Stop Generation",
-    fill_height=True,
-    multimodal=True,
 )
-if __name__ == "__main__":
-    demo.launch(debug=True)

 # Moondream2 için Client kullanıyoruz
 moondream_client = Client("vikhyatk/moondream2")
+# LLaMA için InferenceClient kullanıyoruz
 llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
+# Sohbet geçmişini tutmak için bir değişken
 history = []
 # Resim açıklama fonksiyonu
+def describe_image(image, user_message):
+    global history
     # Resmi Moondream2 API'sine gönderiyoruz
     result = moondream_client.predict(
         img=handle_file(image),
         api_name="/answer_question"
     )
+    # Moondream2'den alınan açıklamayı sisteme dahil ediyoruz
+    description = result  # Moondream2'nin cevabını alıyoruz
+    # LLaMA API'sine açıklamayı ve kullanıcının mesajını gönderiyoruz
+    history.append(f"User: {user_message}")
+    history.append(f"Assistant: {description}")
+    # Sohbet geçmişini birleştirip tek bir mesaj olarak LLaMA'ya gönderiyoruz
+    full_conversation = "\n".join(history)
     llama_result = llama_client.chat_completion(
+        messages=[{"role": "user", "content": full_conversation}],
+        max_tokens=512,  # Burada token sayısını belirleyebilirsiniz
+        temperature=0.7,  # Sıcaklık parametresi
+        top_p=0.95  # Nucleus sampling için top_p parametresi
     )
+    # Sonucu döndürüyoruz
+    return description + "\n\nAssistant: " + llama_result['choices'][0]['message']['content']
+# Sohbet fonksiyonu, resim yüklenip yüklenmediğine göre yönlendirecek
+def chat_or_image(image, user_message):
+    global history
+    # Resim yüklenmişse, önce açıklama alıp sonra LLaMA'ya gönderiyoruz
+    if image:
+        return describe_image(image, user_message)
+    else:
+        # Resim yoksa, direkt LLaMA'ya mesajı gönderiyoruz
+        history.append(f"User: {user_message}")
+        full_conversation = "\n".join(history)
+        llama_result = llama_client.chat_completion(
+            messages=[{"role": "user", "content": full_conversation}],
+            max_tokens=512,
+            temperature=0.7,
+            top_p=0.95
+        )
+        return llama_result['choices'][0]['message']['content']
 # Gradio arayüzü
+demo = gr.Interface(
+    fn=chat_or_image,  # Hem resim hem de metin için kullanılacak fonksiyon
+    inputs=[
+        gr.Image(type="filepath", label="Resim Yükle (isteğe bağlı)"),  # Resim yükleme
+        gr.Textbox(label="Soru Sor ya da Konuş", placeholder="Soru sor...", lines=2)  # Metin girişi
     ],
+    outputs="text",  # Çıktı metin olarak dönecek
 )
+if _name_ == "_main_":
+    demo.launch(show_error=True)  # Hata raporlamayı etkinleştiriyoruz