Add application file
Browse files
app.py
CHANGED
@@ -1,12 +1,7 @@
|
|
1 |
-
from __future__ import annotations
|
2 |
-
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
from PIL import Image
|
5 |
import gradio as gr
|
6 |
-
|
7 |
-
from gradio.themes.base import Base
|
8 |
-
from gradio.themes.utils import colors, fonts, sizes
|
9 |
-
import time
|
10 |
|
11 |
# Load the model and tokenizer
|
12 |
model_id = "vikhyatk/moondream2"
|
@@ -16,76 +11,24 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
16 |
)
|
17 |
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
|
18 |
|
19 |
-
def analyze_image_direct(
|
20 |
-
#
|
21 |
-
#
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
),
|
41 |
-
font_mono: fonts.Font
|
42 |
-
| str
|
43 |
-
| Iterable[fonts.Font | str] = (
|
44 |
-
fonts.GoogleFont("IBM Plex Mono"),
|
45 |
-
"ui-monospace",
|
46 |
-
"monospace",
|
47 |
-
),
|
48 |
-
):
|
49 |
-
super().__init__(
|
50 |
-
primary_hue=primary_hue,
|
51 |
-
secondary_hue=secondary_hue,
|
52 |
-
neutral_hue=neutral_hue,
|
53 |
-
spacing_size=spacing_size,
|
54 |
-
radius_size=radius_size,
|
55 |
-
text_size=text_size,
|
56 |
-
font=font,
|
57 |
-
font_mono=font_mono,
|
58 |
-
)
|
59 |
-
super().set(
|
60 |
-
body_background_fill="repeating-linear-gradient(45deg, *primary_200, *primary_200 10px, *primary_50 10px, *primary_50 20px)",
|
61 |
-
body_background_fill_dark="repeating-linear-gradient(45deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)",
|
62 |
-
button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)",
|
63 |
-
button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)",
|
64 |
-
button_primary_text_color="white",
|
65 |
-
button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)",
|
66 |
-
slider_color="*secondary_300",
|
67 |
-
slider_color_dark="*secondary_600",
|
68 |
-
block_title_text_weight="600",
|
69 |
-
block_border_width="3px",
|
70 |
-
block_shadow="*shadow_drop_lg",
|
71 |
-
button_shadow="*shadow_drop_lg",
|
72 |
-
button_large_padding="32px",
|
73 |
-
)
|
74 |
-
|
75 |
-
|
76 |
-
seafoam = Seafoam()
|
77 |
-
|
78 |
-
with gr.Blocks(theme=seafoam) as demo:
|
79 |
-
with gr.Row():
|
80 |
-
name_input = gr.Textbox(label="Name", placeholder="Enter your name here...")
|
81 |
-
with gr.Row():
|
82 |
-
count_slider = gr.Slider(label="Count", minimum=0, maximum=100, step=1, value=0)
|
83 |
-
with gr.Row():
|
84 |
-
submit_button = gr.Button("Submit")
|
85 |
-
clear_button = gr.Button("Clear")
|
86 |
-
output = gr.Textbox(label="Output")
|
87 |
-
|
88 |
-
submit_button.click(fn=analyze_image_direct, inputs=[name_input, count_slider], outputs=output)
|
89 |
-
clear_button.click(fn=lambda: ("", 0, ""), inputs=None, outputs=[name_input, count_slider, output])
|
90 |
-
|
91 |
-
demo.launch()
|
|
|
|
|
|
|
1 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
2 |
from PIL import Image
|
3 |
import gradio as gr
|
4 |
+
import numpy as np
|
|
|
|
|
|
|
5 |
|
6 |
# Load the model and tokenizer
|
7 |
model_id = "vikhyatk/moondream2"
|
|
|
11 |
)
|
12 |
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
|
13 |
|
14 |
+
def analyze_image_direct(image, question):
|
15 |
+
# Convert PIL Image to the format expected by the model
|
16 |
+
# Note: This step depends on the model's expected input format
|
17 |
+
# For demonstration, assuming the model accepts PIL images directly
|
18 |
+
enc_image = model.encode_image(image) # This method might not exist; adjust based on actual model capabilities
|
19 |
+
|
20 |
+
# Generate an answer to the question based on the encoded image
|
21 |
+
# Note: This step is hypothetical and depends on the model's capabilities
|
22 |
+
answer = model.answer_question(enc_image, question, tokenizer) # Adjust based on actual model capabilities
|
23 |
+
|
24 |
+
return answer
|
25 |
+
|
26 |
+
# Create Gradio interface
|
27 |
+
iface = gr.Interface(fn=analyze_image_direct,
|
28 |
+
inputs=[gr.Image(type="pil"), gr.Textbox(lines=2, placeholder="Enter your question here...")],
|
29 |
+
outputs='text',
|
30 |
+
title="Direct Image Question Answering",
|
31 |
+
description="Upload an image and ask a question about it directly using the model.")
|
32 |
+
|
33 |
+
# Launch the interface
|
34 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|