Spaces:

gizemsarsinlar
/

Art_Analysis_with_Phi-4

Running on Zero

App Files Files Community

gizemsarsinlar commited on Mar 13

Commit

2cf5592

verified ·

1 Parent(s): 8344cc1

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -43

app.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import gradio as gr
 from PIL import Image
 import torch
 from transformers import AutoModelForCausalLM, AutoProcessor
 import spaces
-# Define model path
 model_path = "microsoft/Phi-4-multimodal-instruct"
-# Load model and processor
 processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
@@ -17,37 +16,44 @@ model = AutoModelForCausalLM.from_pretrained(
     _attn_implementation="eager",
 )
-# Define prompt structure
 user_prompt = "<|user|>"
 assistant_prompt = "<|assistant|>"
 prompt_suffix = "<|end|>"
-# Define inference function
 @spaces.GPU
-def analyze_artwork(file):
     if not file:
         return "Please upload an image of an artwork."
-    # Custom prompt for artwork analysis
-    prompt = (
-        f"{user_prompt} You are an expert art historian and critic. Your task is to analyze the given artwork."
-        f" Provide a structured and insightful analysis based on the following points:\n\n"
-        f"1. **General Description**: Describe the colors, composition, and subject.\n"
-        f"2. **Artistic Style**: Identify the artistic movement (e.g., Impressionism, Surrealism).\n"
-        f"3. **Historical Context**: Discuss the period (e.g., Renaissance, Baroque) and cultural influences that influenced the artwork.\n"
-        f"4. **Symbolism & Meaning**: Interpret the messages and emotions conveyed.\n"
-        f"5. **Technical Analysis**: Examine brushwork, lighting, and composition.\n"
-        f"6. **Impact & Significance**: Explain the artwork’s relevance in the art world.\n\n"
-        f"Here is the artwork for analysis:\n"
-        f"<|image_1|>\n"
-        f"{prompt_suffix}{assistant_prompt}"
-    )
-    # Open image from uploaded file
-    image = Image.open(file)
-    inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
-    # Generate response
     with torch.no_grad():
         generate_ids = model.generate(
             **inputs,
@@ -60,38 +66,48 @@ def analyze_artwork(file):
     response = processor.batch_decode(
         generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )[0]
     return response
-# Gradio interface
-with gr.Blocks(title="Art Analysis with Phi-4") as demo:
     gr.Markdown(
         """
-        # 🎨 Art Analysis with Phi-4
-        Upload an **image** of an artwork, and get a detailed analysis by an AI art historian!
-        Built with the microsoft/Phi-4-multimodal-instruct model.
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
-            image_input = gr.Image(label="Upload Artwork Image", type="filepath")
-            submit_btn = gr.Button("Analyze Artwork", variant="primary")
         with gr.Column(scale=2):
             output_text = gr.Textbox(
-                label="Art Analysis Result",
                 placeholder="The AI's response will appear here...",
-                lines=22,
                 interactive=False,
             )
-    # Connect the submit button
     submit_btn.click(
-        fn=analyze_artwork,
-        inputs=[image_input],
         outputs=output_text,
     )
-# Launch the demo
 demo.launch()

 import gradio as gr
 from PIL import Image
 import torch
+import soundfile as sf  # Ses işleme için
 from transformers import AutoModelForCausalLM, AutoProcessor
 import spaces
+# Modeli yükle
 model_path = "microsoft/Phi-4-multimodal-instruct"
 processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
     _attn_implementation="eager",
 )
 user_prompt = "<|user|>"
 assistant_prompt = "<|assistant|>"
 prompt_suffix = "<|end|>"
 @spaces.GPU
+def process_input(input_type, file):
     if not file:
         return "Please upload an image of an artwork."
+    if input_type == "Image":
+        # **Prompt for Artworks**
+        prompt = (
+            f"{user_prompt} You are an expert art historian and critic. Analyze the given artwork with these aspects:\n\n"
+            f"1. **General Description**: Describe the colors, composition, and subject.\n"
+            f"2. **Artistic Style**: Identify the artistic movement.\n"
+            f"3. **Historical Context**: Discuss the period and influences.\n"
+            f"4. **Symbolism & Meaning**: Interpret the messages conveyed.\n"
+            f"5. **Technical Analysis**: Examine brushwork, lighting, and composition.\n"
+            f"6. **Impact & Significance**: Explain its relevance in art history.\n\n"
+            f"Here is the artwork for analysis:\n"
+            f"<|image_1|>\n"
+            f"{prompt_suffix}{assistant_prompt}"
+        )
+        image = Image.open(file)
+        inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
+    elif input_type == "Audio":
+        prompt = (
+            f"{user_prompt} Please transcribe the given audio into text accurately.\n\n"
+            f"<|audio_1|>\n"
+            f"{prompt_suffix}{assistant_prompt}"
+        )
+        audio, samplerate = sf.read(file)
+        inputs = processor(text=prompt, audios=[(audio, samplerate)], return_tensors="pt").to(model.device)
+    else:
+        return "Geçersiz giriş türü seçildi."
     with torch.no_grad():
         generate_ids = model.generate(
             **inputs,
     response = processor.batch_decode(
         generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )[0]
     return response
+with gr.Blocks(title="Art & Audio Analysis with Phi-4") as demo:
     gr.Markdown(
         """
+        # 🎨🗣️ Multimodal Art Analysis with Phi-4
+        - **Art Analysis**: Upload a piece of art, AI will perform a detailed analysis.
+        - **Audio Transcription**: Upload your audio file, AI will convert it to text.
+        🚀 Powered by Microsoft's `Phi-4-multimodal-instruct` model.
+        With this project, you can both analyze the uploaded image and convert the guide's verbal explanation into text while visiting any exhibition or museum.
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
+            input_type = gr.Radio(
+                choices=["Image", "Audio"],
+                label="Select Input Type",
+                value="Image",
+            )
+            file_input = gr.File(
+                label="Upload File",
+                file_types=["image", "audio"],
+            )
+            submit_btn = gr.Button("Analyze", variant="primary")
         with gr.Column(scale=2):
             output_text = gr.Textbox(
+                label="AI Response",
                 placeholder="The AI's response will appear here...",
+                lines=12,
                 interactive=False,
             )
     submit_btn.click(
+        fn=process_input,
+        inputs=[input_type, file_input],
         outputs=output_text,
     )
 demo.launch()