VideoLLaMA3-Image

Running on Zero

App Files Files Community

Update image demo

by CircleRadon - opened Jan 24

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

-11

Files changed (3) hide show

app.py +8 -8
examples/cake.jpg +0 -0
examples/cake.png +0 -3

app.py CHANGED Viewed

@@ -29,13 +29,13 @@ HEADER = ("""
 device = "cuda"
 model = AutoModelForCausalLM.from_pretrained(
-    "DAMO-NLP-SG/VideoLLaMA3-7B",
     trust_remote_code=True,
     torch_dtype=torch.bfloat16,
     attn_implementation="flash_attention_2",
 )
 model.to(device)
-processor = AutoProcessor.from_pretrained("DAMO-NLP-SG/VideoLLaMA3-7B", trust_remote_code=True)
 example_dir = "./examples"
@@ -143,7 +143,7 @@ with gr.Blocks() as interface:
             with gr.Tab(label="Input"):
                 with gr.Row():
-                    input_video = gr.Video(sources=["upload"], label="Upload Video")
                     input_image = gr.Image(sources=["upload"], type="filepath", label="Upload Image")
                 input_text = gr.Textbox(label="Input Text", placeholder="Type your message here and press enter to submit")
@@ -151,10 +151,10 @@ with gr.Blocks() as interface:
                 submit_button = gr.Button("Generate")
                 gr.Examples(examples=[
-                    [f"examples/bear.mp4", "What is unusual in the video?"],
-                    [f"examples/dog.mp4", "Please describe the video in detail."],
-                    [f"examples/running.mp4", "Who won the competition?"],
-                ], inputs=[input_video, input_text], label="Video examples")
             with gr.Tab(label="Configure"):
                 with gr.Accordion("Generation Config", open=True):
@@ -167,7 +167,7 @@ with gr.Blocks() as interface:
                     fps = gr.Slider(minimum=0.0, maximum=10.0, value=1, label="FPS")
                     max_frames = gr.Slider(minimum=0, maximum=256, value=180, step=1, label="Max Frames")
-    input_video.change(_on_video_upload, [chatbot, input_video], [chatbot, input_video])
     input_image.change(_on_image_upload, [chatbot, input_image], [chatbot, input_image])
     input_text.submit(_on_text_submit, [chatbot, input_text], [chatbot, input_text])
     submit_button.click(

 device = "cuda"
 model = AutoModelForCausalLM.from_pretrained(
+    "DAMO-NLP-SG/VideoLLaMA3-7B-Image",
     trust_remote_code=True,
     torch_dtype=torch.bfloat16,
     attn_implementation="flash_attention_2",
 )
 model.to(device)
+processor = AutoProcessor.from_pretrained("DAMO-NLP-SG/VideoLLaMA3-7B-Image", trust_remote_code=True)
 example_dir = "./examples"
             with gr.Tab(label="Input"):
                 with gr.Row():
+                    # input_video = gr.Video(sources=["upload"], label="Upload Video")
                     input_image = gr.Image(sources=["upload"], type="filepath", label="Upload Image")
                 input_text = gr.Textbox(label="Input Text", placeholder="Type your message here and press enter to submit")
                 submit_button = gr.Button("Generate")
                 gr.Examples(examples=[
+                    [f"examples/cake.jpg", "What are the words on the cake?"],
+                    [f"examples/chart.jpg", "What do you think of this stock? Is it worth holding? Why?"],
+                    [f"examples/performance.png", "Which model do you think is the strongest? Why?"],
+                ], inputs=[input_image, input_text], label="Image examples")
             with gr.Tab(label="Configure"):
                 with gr.Accordion("Generation Config", open=True):
                     fps = gr.Slider(minimum=0.0, maximum=10.0, value=1, label="FPS")
                     max_frames = gr.Slider(minimum=0, maximum=256, value=180, step=1, label="Max Frames")
+    # input_video.change(_on_video_upload, [chatbot, input_video], [chatbot, input_video])
     input_image.change(_on_image_upload, [chatbot, input_image], [chatbot, input_image])
     input_text.submit(_on_text_submit, [chatbot, input_text], [chatbot, input_text])
     submit_button.click(

examples/cake.jpg ADDED Viewed

examples/cake.png DELETED Viewed

Git LFS Details

SHA256: 725702ea726865471b37fa5849c29cd506fd866d813a1d3e51a2f84c73df7baa
Pointer size: 132 Bytes
Size of remote file: 3.74 MB