Files changed (3) hide show
  1. app.py +8 -8
  2. examples/cake.jpg +0 -0
  3. examples/cake.png +0 -3
app.py CHANGED
@@ -29,13 +29,13 @@ HEADER = ("""
29
 
30
  device = "cuda"
31
  model = AutoModelForCausalLM.from_pretrained(
32
- "DAMO-NLP-SG/VideoLLaMA3-7B",
33
  trust_remote_code=True,
34
  torch_dtype=torch.bfloat16,
35
  attn_implementation="flash_attention_2",
36
  )
37
  model.to(device)
38
- processor = AutoProcessor.from_pretrained("DAMO-NLP-SG/VideoLLaMA3-7B", trust_remote_code=True)
39
 
40
 
41
  example_dir = "./examples"
@@ -143,7 +143,7 @@ with gr.Blocks() as interface:
143
  with gr.Tab(label="Input"):
144
 
145
  with gr.Row():
146
- input_video = gr.Video(sources=["upload"], label="Upload Video")
147
  input_image = gr.Image(sources=["upload"], type="filepath", label="Upload Image")
148
 
149
  input_text = gr.Textbox(label="Input Text", placeholder="Type your message here and press enter to submit")
@@ -151,10 +151,10 @@ with gr.Blocks() as interface:
151
  submit_button = gr.Button("Generate")
152
 
153
  gr.Examples(examples=[
154
- [f"examples/bear.mp4", "What is unusual in the video?"],
155
- [f"examples/dog.mp4", "Please describe the video in detail."],
156
- [f"examples/running.mp4", "Who won the competition?"],
157
- ], inputs=[input_video, input_text], label="Video examples")
158
 
159
  with gr.Tab(label="Configure"):
160
  with gr.Accordion("Generation Config", open=True):
@@ -167,7 +167,7 @@ with gr.Blocks() as interface:
167
  fps = gr.Slider(minimum=0.0, maximum=10.0, value=1, label="FPS")
168
  max_frames = gr.Slider(minimum=0, maximum=256, value=180, step=1, label="Max Frames")
169
 
170
- input_video.change(_on_video_upload, [chatbot, input_video], [chatbot, input_video])
171
  input_image.change(_on_image_upload, [chatbot, input_image], [chatbot, input_image])
172
  input_text.submit(_on_text_submit, [chatbot, input_text], [chatbot, input_text])
173
  submit_button.click(
 
29
 
30
  device = "cuda"
31
  model = AutoModelForCausalLM.from_pretrained(
32
+ "DAMO-NLP-SG/VideoLLaMA3-7B-Image",
33
  trust_remote_code=True,
34
  torch_dtype=torch.bfloat16,
35
  attn_implementation="flash_attention_2",
36
  )
37
  model.to(device)
38
+ processor = AutoProcessor.from_pretrained("DAMO-NLP-SG/VideoLLaMA3-7B-Image", trust_remote_code=True)
39
 
40
 
41
  example_dir = "./examples"
 
143
  with gr.Tab(label="Input"):
144
 
145
  with gr.Row():
146
+ # input_video = gr.Video(sources=["upload"], label="Upload Video")
147
  input_image = gr.Image(sources=["upload"], type="filepath", label="Upload Image")
148
 
149
  input_text = gr.Textbox(label="Input Text", placeholder="Type your message here and press enter to submit")
 
151
  submit_button = gr.Button("Generate")
152
 
153
  gr.Examples(examples=[
154
+ [f"examples/cake.jpg", "What are the words on the cake?"],
155
+ [f"examples/chart.jpg", "What do you think of this stock? Is it worth holding? Why?"],
156
+ [f"examples/performance.png", "Which model do you think is the strongest? Why?"],
157
+ ], inputs=[input_image, input_text], label="Image examples")
158
 
159
  with gr.Tab(label="Configure"):
160
  with gr.Accordion("Generation Config", open=True):
 
167
  fps = gr.Slider(minimum=0.0, maximum=10.0, value=1, label="FPS")
168
  max_frames = gr.Slider(minimum=0, maximum=256, value=180, step=1, label="Max Frames")
169
 
170
+ # input_video.change(_on_video_upload, [chatbot, input_video], [chatbot, input_video])
171
  input_image.change(_on_image_upload, [chatbot, input_image], [chatbot, input_image])
172
  input_text.submit(_on_text_submit, [chatbot, input_text], [chatbot, input_text])
173
  submit_button.click(
examples/cake.jpg ADDED
examples/cake.png DELETED

Git LFS Details

  • SHA256: 725702ea726865471b37fa5849c29cd506fd866d813a1d3e51a2f84c73df7baa
  • Pointer size: 132 Bytes
  • Size of remote file: 3.74 MB