Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
e1a2485
1
Parent(s):
87966a5
qwen2-vl-fix
Browse files
app.py
CHANGED
|
@@ -49,11 +49,28 @@ def florence_caption(image):
|
|
| 49 |
)
|
| 50 |
return parsed_answer["<MORE_DETAILED_CAPTION>"]
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
# Qwen2-VL-2B caption function
|
| 53 |
@spaces.GPU
|
| 54 |
def qwen_caption(image):
|
| 55 |
if not isinstance(image, Image.Image):
|
| 56 |
-
image = Image.fromarray(image)
|
| 57 |
|
| 58 |
image_path = array_to_image_path(np.array(image))
|
| 59 |
|
|
@@ -65,7 +82,7 @@ def qwen_caption(image):
|
|
| 65 |
"type": "image",
|
| 66 |
"image": image_path,
|
| 67 |
},
|
| 68 |
-
{"type": "text", "text": "Describe this image in detail."},
|
| 69 |
],
|
| 70 |
}
|
| 71 |
]
|
|
@@ -518,7 +535,7 @@ def create_interface():
|
|
| 518 |
with gr.Accordion("Image and Caption", open=False):
|
| 519 |
input_image = gr.Image(label="Input Image (optional)")
|
| 520 |
caption_output = gr.Textbox(label="Generated Caption", lines=3)
|
| 521 |
-
caption_model = gr.Radio(["Florence", "
|
| 522 |
create_caption_button = gr.Button("Create Caption")
|
| 523 |
add_caption_button = gr.Button("Add Caption to Prompt")
|
| 524 |
|
|
@@ -540,9 +557,9 @@ def create_interface():
|
|
| 540 |
|
| 541 |
def create_caption(image, model):
|
| 542 |
if image is not None:
|
| 543 |
-
if model == "Florence":
|
| 544 |
return florence_caption(image)
|
| 545 |
-
elif model == "
|
| 546 |
return qwen_caption(image)
|
| 547 |
return ""
|
| 548 |
|
|
|
|
| 49 |
)
|
| 50 |
return parsed_answer["<MORE_DETAILED_CAPTION>"]
|
| 51 |
|
| 52 |
+
# Add this function to your code
|
| 53 |
+
def array_to_image_path(image_array):
|
| 54 |
+
# Convert numpy array to PIL Image
|
| 55 |
+
img = Image.fromarray(np.uint8(image_array))
|
| 56 |
+
|
| 57 |
+
# Generate a unique filename using timestamp
|
| 58 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 59 |
+
filename = f"image_{timestamp}.png"
|
| 60 |
+
|
| 61 |
+
# Save the image
|
| 62 |
+
img.save(filename)
|
| 63 |
+
|
| 64 |
+
# Get the full path of the saved image
|
| 65 |
+
full_path = os.path.abspath(filename)
|
| 66 |
+
|
| 67 |
+
return full_path
|
| 68 |
+
|
| 69 |
# Qwen2-VL-2B caption function
|
| 70 |
@spaces.GPU
|
| 71 |
def qwen_caption(image):
|
| 72 |
if not isinstance(image, Image.Image):
|
| 73 |
+
image = Image.fromarray(np.uint8(image))
|
| 74 |
|
| 75 |
image_path = array_to_image_path(np.array(image))
|
| 76 |
|
|
|
|
| 82 |
"type": "image",
|
| 83 |
"image": image_path,
|
| 84 |
},
|
| 85 |
+
{"type": "text", "text": "Describe this image in great detail."},
|
| 86 |
],
|
| 87 |
}
|
| 88 |
]
|
|
|
|
| 535 |
with gr.Accordion("Image and Caption", open=False):
|
| 536 |
input_image = gr.Image(label="Input Image (optional)")
|
| 537 |
caption_output = gr.Textbox(label="Generated Caption", lines=3)
|
| 538 |
+
caption_model = gr.Radio(["Florence-2", "Qwen2-VL"], label="Caption Model", value="Florence-2")
|
| 539 |
create_caption_button = gr.Button("Create Caption")
|
| 540 |
add_caption_button = gr.Button("Add Caption to Prompt")
|
| 541 |
|
|
|
|
| 557 |
|
| 558 |
def create_caption(image, model):
|
| 559 |
if image is not None:
|
| 560 |
+
if model == "Florence-2":
|
| 561 |
return florence_caption(image)
|
| 562 |
+
elif model == "Qwen2-VL":
|
| 563 |
return qwen_caption(image)
|
| 564 |
return ""
|
| 565 |
|