gizemsarsinlar commited on
Commit
2cf5592
·
verified ·
1 Parent(s): 8344cc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -43
app.py CHANGED
@@ -1,13 +1,12 @@
1
  import gradio as gr
2
  from PIL import Image
3
  import torch
 
4
  from transformers import AutoModelForCausalLM, AutoProcessor
5
  import spaces
6
 
7
- # Define model path
8
  model_path = "microsoft/Phi-4-multimodal-instruct"
9
-
10
- # Load model and processor
11
  processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_path,
@@ -17,37 +16,44 @@ model = AutoModelForCausalLM.from_pretrained(
17
  _attn_implementation="eager",
18
  )
19
 
20
- # Define prompt structure
21
  user_prompt = "<|user|>"
22
  assistant_prompt = "<|assistant|>"
23
  prompt_suffix = "<|end|>"
24
 
25
- # Define inference function
26
  @spaces.GPU
27
- def analyze_artwork(file):
28
  if not file:
29
  return "Please upload an image of an artwork."
30
-
31
- # Custom prompt for artwork analysis
32
- prompt = (
33
- f"{user_prompt} You are an expert art historian and critic. Your task is to analyze the given artwork."
34
- f" Provide a structured and insightful analysis based on the following points:\n\n"
35
- f"1. **General Description**: Describe the colors, composition, and subject.\n"
36
- f"2. **Artistic Style**: Identify the artistic movement (e.g., Impressionism, Surrealism).\n"
37
- f"3. **Historical Context**: Discuss the period (e.g., Renaissance, Baroque) and cultural influences that influenced the artwork.\n"
38
- f"4. **Symbolism & Meaning**: Interpret the messages and emotions conveyed.\n"
39
- f"5. **Technical Analysis**: Examine brushwork, lighting, and composition.\n"
40
- f"6. **Impact & Significance**: Explain the artwork’s relevance in the art world.\n\n"
41
- f"Here is the artwork for analysis:\n"
42
- f"<|image_1|>\n"
43
- f"{prompt_suffix}{assistant_prompt}"
44
- )
45
-
46
- # Open image from uploaded file
47
- image = Image.open(file)
48
- inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
49
-
50
- # Generate response
 
 
 
 
 
 
 
 
 
51
  with torch.no_grad():
52
  generate_ids = model.generate(
53
  **inputs,
@@ -60,38 +66,48 @@ def analyze_artwork(file):
60
  response = processor.batch_decode(
61
  generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
62
  )[0]
63
-
64
  return response
65
 
66
- # Gradio interface
67
- with gr.Blocks(title="Art Analysis with Phi-4") as demo:
68
  gr.Markdown(
69
  """
70
- # 🎨 Art Analysis with Phi-4
71
- Upload an **image** of an artwork, and get a detailed analysis by an AI art historian!
72
- Built with the microsoft/Phi-4-multimodal-instruct model.
 
 
 
 
 
73
  """
74
  )
75
-
76
  with gr.Row():
77
  with gr.Column(scale=1):
78
- image_input = gr.Image(label="Upload Artwork Image", type="filepath")
79
- submit_btn = gr.Button("Analyze Artwork", variant="primary")
80
-
 
 
 
 
 
 
 
 
81
  with gr.Column(scale=2):
82
  output_text = gr.Textbox(
83
- label="Art Analysis Result",
84
  placeholder="The AI's response will appear here...",
85
- lines=22,
86
  interactive=False,
87
  )
88
 
89
- # Connect the submit button
90
  submit_btn.click(
91
- fn=analyze_artwork,
92
- inputs=[image_input],
93
  outputs=output_text,
94
  )
95
 
96
- # Launch the demo
97
  demo.launch()
 
1
  import gradio as gr
2
  from PIL import Image
3
  import torch
4
+ import soundfile as sf # Ses işleme için
5
  from transformers import AutoModelForCausalLM, AutoProcessor
6
  import spaces
7
 
8
+ # Modeli yükle
9
  model_path = "microsoft/Phi-4-multimodal-instruct"
 
 
10
  processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
11
  model = AutoModelForCausalLM.from_pretrained(
12
  model_path,
 
16
  _attn_implementation="eager",
17
  )
18
 
 
19
  user_prompt = "<|user|>"
20
  assistant_prompt = "<|assistant|>"
21
  prompt_suffix = "<|end|>"
22
 
 
23
  @spaces.GPU
24
+ def process_input(input_type, file):
25
  if not file:
26
  return "Please upload an image of an artwork."
27
+
28
+ if input_type == "Image":
29
+ # **Prompt for Artworks**
30
+ prompt = (
31
+ f"{user_prompt} You are an expert art historian and critic. Analyze the given artwork with these aspects:\n\n"
32
+ f"1. **General Description**: Describe the colors, composition, and subject.\n"
33
+ f"2. **Artistic Style**: Identify the artistic movement.\n"
34
+ f"3. **Historical Context**: Discuss the period and influences.\n"
35
+ f"4. **Symbolism & Meaning**: Interpret the messages conveyed.\n"
36
+ f"5. **Technical Analysis**: Examine brushwork, lighting, and composition.\n"
37
+ f"6. **Impact & Significance**: Explain its relevance in art history.\n\n"
38
+ f"Here is the artwork for analysis:\n"
39
+ f"<|image_1|>\n"
40
+ f"{prompt_suffix}{assistant_prompt}"
41
+ )
42
+ image = Image.open(file)
43
+ inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
44
+
45
+ elif input_type == "Audio":
46
+ prompt = (
47
+ f"{user_prompt} Please transcribe the given audio into text accurately.\n\n"
48
+ f"<|audio_1|>\n"
49
+ f"{prompt_suffix}{assistant_prompt}"
50
+ )
51
+ audio, samplerate = sf.read(file)
52
+ inputs = processor(text=prompt, audios=[(audio, samplerate)], return_tensors="pt").to(model.device)
53
+
54
+ else:
55
+ return "Geçersiz giriş türü seçildi."
56
+
57
  with torch.no_grad():
58
  generate_ids = model.generate(
59
  **inputs,
 
66
  response = processor.batch_decode(
67
  generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
68
  )[0]
69
+
70
  return response
71
 
72
+ with gr.Blocks(title="Art & Audio Analysis with Phi-4") as demo:
 
73
  gr.Markdown(
74
  """
75
+ # 🎨🗣️ Multimodal Art Analysis with Phi-4
76
+ - **Art Analysis**: Upload a piece of art, AI will perform a detailed analysis.
77
+ - **Audio Transcription**: Upload your audio file, AI will convert it to text.
78
+
79
+ 🚀 Powered by Microsoft's `Phi-4-multimodal-instruct` model.
80
+
81
+ With this project, you can both analyze the uploaded image and convert the guide's verbal explanation into text while visiting any exhibition or museum.
82
+
83
  """
84
  )
85
+
86
  with gr.Row():
87
  with gr.Column(scale=1):
88
+ input_type = gr.Radio(
89
+ choices=["Image", "Audio"],
90
+ label="Select Input Type",
91
+ value="Image",
92
+ )
93
+ file_input = gr.File(
94
+ label="Upload File",
95
+ file_types=["image", "audio"],
96
+ )
97
+ submit_btn = gr.Button("Analyze", variant="primary")
98
+
99
  with gr.Column(scale=2):
100
  output_text = gr.Textbox(
101
+ label="AI Response",
102
  placeholder="The AI's response will appear here...",
103
+ lines=12,
104
  interactive=False,
105
  )
106
 
 
107
  submit_btn.click(
108
+ fn=process_input,
109
+ inputs=[input_type, file_input],
110
  outputs=output_text,
111
  )
112
 
 
113
  demo.launch()