wondervictor commited on
Commit
d8936c7
·
1 Parent(s): 6a710e2

change styles

Browse files
Files changed (5) hide show
  1. .gradio/certificate.pem +31 -0
  2. app.py +141 -181
  3. assets/logo.png +0 -0
  4. assets/logo.svg +1 -0
  5. infer.py +5 -1
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
app.py CHANGED
@@ -9,7 +9,8 @@ import gradio as gr
9
  from infer import SeedVLInfer, ConversationModeI18N, ConversationModeCN
10
  from visualizer import draw_boxes_points_with_labels
11
 
12
- infer = SeedVLInfer(model_id=os.getenv('MODEL_ID'), api_key=os.getenv('API_KEY'))
 
13
 
14
  label_translations = {
15
  "gr_chatinterface_ofl": {
@@ -59,42 +60,48 @@ label_translations = {
59
  }
60
  }
61
 
 
62
  def add_escape(text: str):
63
  return text.replace('<', '\<').replace('>', '\>')
64
 
 
65
  def remove_escape(text: str):
66
  return text.replace('\<', '<').replace('\>', '>')
67
 
 
68
  def plot_boxes_points_detections(image_path, message):
69
  detection_pattern = r'\[\s*{.*?}\s*\]'
70
- detection_matches = re.finditer(detection_pattern, message, flags=re.DOTALL)
 
 
71
  bboxes, categories = [], []
72
  for match in detection_matches:
73
  matched_str = match.group(0)
74
  detections = json.loads(matched_str)
75
  for detection in detections:
76
  cat, bbox_str = detection['category'], detection['bbox']
77
- bbox_str = bbox_str.replace('<bbox>', '').replace('</bbox>', '').replace('</bbox', '')
 
 
78
  bbox = list(map(float, bbox_str.split(' ')))
79
  bboxes.append(bbox)
80
  categories.append(cat)
81
  if not bboxes:
82
  box_pattern = r'<bbox>(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)</bbox>'
83
  box_matches = re.finditer(box_pattern, message)
84
- bboxes = [
85
- [float(match.group(1)), float(match.group(2)),
86
- float(match.group(3)), float(match.group(4))]
87
- for match in box_matches
88
- ]
89
-
 
90
  points = []
91
  if not bboxes:
92
  point_pattern = r'<point>(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)</point>'
93
  point_matches = re.finditer(point_pattern, message)
94
- points = [
95
- [float(match.group(1)), float(match.group(2))]
96
- for match in point_matches
97
- ]
98
 
99
  if not bboxes and not points:
100
  return
@@ -110,19 +117,26 @@ def plot_boxes_points_detections(image_path, message):
110
  if points.size:
111
  points[:, 0] *= w
112
  points[:, 1] *= h
113
- output_image = draw_boxes_points_with_labels(image, bboxes, points, categories)
 
114
  return output_image
115
 
116
- def general_chat(inputs: dict, gr_history: list, infer_history: list,
117
- if_thinking: bool, temperature: float, online: bool = False):
 
 
 
 
 
118
  if 'text' in inputs:
119
  inputs['text'] = remove_escape(inputs['text'])
120
  mode = ConversationModeI18N.D if if_thinking else ConversationModeI18N.G
121
- for response_text, infer_history, finished in infer(inputs=inputs,
122
- history=infer_history,
123
- mode=mode,
124
- temperature=temperature,
125
- online=online):
 
126
  if if_thinking:
127
  reasoning_text, response_text = response_text.split('</think>')
128
  reasoning_text = reasoning_text.lstrip('<think>')
@@ -141,13 +155,16 @@ def general_chat(inputs: dict, gr_history: list, infer_history: list,
141
  "role": "assistant",
142
  "content": add_escape(response_text)
143
  }]
144
- if finished and len(inputs.get('files', [])) == 1 and not inputs['files'][0].endswith('.mp4'):
 
145
  image_path = inputs['files'][0]
146
  response_text = infer_history[-1]['content']
147
  try:
148
  if if_thinking:
149
- reasoning_text, response_text = response_text.split('</think>')
150
- output_image = plot_boxes_points_detections(image_path, response_text)
 
 
151
  if output_image is not None:
152
  response_message.append({
153
  "role": "assistant",
@@ -157,6 +174,7 @@ def general_chat(inputs: dict, gr_history: list, infer_history: list,
157
  print(e)
158
  yield response_message, infer_history
159
 
 
160
  def online_record_chat(text: str, gr_history: list, gr_webcam_images: list,
161
  gr_counter: int, infer_history: list, if_thinking: bool,
162
  temperature: float):
@@ -166,74 +184,28 @@ def online_record_chat(text: str, gr_history: list, gr_webcam_images: list,
166
  inputs = {'text': text, 'files': [webp for webp, _ in gr_webcam_images]}
167
  yield f'received {len(gr_webcam_images)} new frames, processing...', gr_counter + len(
168
  gr_webcam_images), infer_history
169
- for response_message, infer_history in general_chat(
170
- inputs, gr_history, infer_history, if_thinking, temperature, online=True):
 
 
 
 
171
  yield response_message, gr.skip(), infer_history
172
 
173
- with gr.Blocks() as demo:
 
174
  with gr.Row():
175
  with gr.Column():
176
- gr_title = gr.Markdown('# Seed1.5-VL')
177
- with gr.Row():
178
- gr.Markdown(
179
- """
180
- <div style="display:flex; flex-direction:column; gap:10px;">
181
- <a
182
- href="https://github.com/ByteDance-Seed/Seed1.5-VL"
183
- target="_blank"
184
- style="
185
- display: inline-flex;
186
- align-items: center;
187
- gap: 8px;
188
- white-space: nowrap;
189
- text-decoration: none;
190
- "
191
- >
192
- <img
193
- src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/github/github-original.svg"
194
- alt="GitHub"
195
- width="24"
196
- >
197
- Seed1.5-VL Cookbook
198
- </a>
199
- </div>
200
- """
201
- )
202
- gr.Markdown(
203
- """
204
- <div style="display:flex; flex-direction:column; gap:10px;">
205
- <a
206
- href="https://huggingface.co/papers/2505.07062"
207
- target="_blank"
208
- style="
209
- display: inline-flex;
210
- align-items: center;
211
- gap: 8px;
212
- white-space: nowrap;
213
- text-decoration: none;
214
- "
215
- >
216
- <img
217
- src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
218
- alt="Paper"
219
- width="24"
220
- >
221
- Seed1.5-VL Paper
222
- </a>
223
- </div>
224
- """,
225
- )
226
- gr.Markdown('')
227
- gr.Markdown('')
228
- gr.Markdown('')
229
-
230
- gr_lang_selector = gr.Dropdown(choices=["English", "中文"],
231
- value="English",
232
- label="🌐 English Interface/中文界面",
233
- interactive=True,
234
- min_width=400,
235
- scale=0)
236
 
 
 
 
 
 
 
 
237
  with gr.Tabs():
238
  with gr.Tab("Offline") as gr_tab_ofl:
239
  gr_infer_history = gr.State([])
@@ -262,15 +234,16 @@ with gr.Blocks() as demo:
262
  ],
263
  additional_outputs=[gr_infer_history],
264
  )
 
265
  def add_escape_fn(inputs: dict):
266
  if inputs and 'text' in inputs:
267
  inputs['text'] = add_escape(inputs['text'])
268
  return inputs
 
269
  gr_chatinterface_ofl.textbox.submit(
270
  fn=add_escape_fn,
271
  inputs=[gr_chatinterface_ofl.saved_input],
272
- outputs=[gr_chatinterface_ofl.saved_input]
273
- )
274
  gr.on(triggers=[gr_chatinterface_ofl.chatbot.clear],
275
  fn=lambda: [],
276
  outputs=[gr_infer_history])
@@ -280,8 +253,8 @@ with gr.Blocks() as demo:
280
  label=label_translations['gr_thinking']['English'],
281
  )
282
  gr_thinking_ofl.change(lambda x: x,
283
- inputs=gr_thinking_ofl,
284
- outputs=gr_thinking_hidden)
285
  gr_temperature_ofl = gr.Slider(
286
  minimum=0.0,
287
  maximum=2.0,
@@ -290,101 +263,84 @@ with gr.Blocks() as demo:
290
  label=label_translations['gr_temperature']['English'],
291
  interactive=True)
292
  gr_temperature_ofl.change(lambda x: x,
293
- inputs=gr_temperature_ofl,
294
- outputs=gr_temperature_hidden)
295
- gr_clear_button_ofl = gr.Button(value=label_translations['gr_clear_button']['English'])
 
 
296
  def clear_history_fn():
297
  return None, [], [], [], []
 
298
  gr_clear_button_ofl.click(
299
- fn=clear_history_fn,
300
  outputs=[
301
- gr_chatinterface_ofl.conversation_id,
302
- gr_chatinterface_ofl.saved_conversations,
303
  gr_chatinterface_ofl.chatbot,
304
- gr_chatinterface_ofl.chatbot_state,
305
- gr_infer_history
306
- ]
307
- )
308
  with gr.Column(visible=True) as gr_examples_en:
309
  gr.Examples(
310
- label='7 Examples: text, image, video, multiple images/videos, visual puzzle, points grounding, open-vocabulary detection.',
311
- examples=[
312
- {
313
- "text": "Who are you?",
314
- "files": []
315
- },
316
- {
317
- "text": "Introduce this.",
318
- "files": ["examples/bancopy.jpg"]
319
- },
320
- {
321
- "text":
322
- """Find Curry's "Good Night" celebration time.""",
323
- "files":
324
- ["examples/I7pTpMjqNRM_1080p_small.mp4"]
325
- },
326
- {
327
- "text":
328
- "Share your feelings.",
329
- "files": [
330
- "examples/newyork.jpg",
331
- "examples/beijing.jpg"
332
- ]
333
- },
334
- {
335
- "text": "Look and answer.",
336
- "files": ["examples/puzzle.jpg"]
337
- },
338
- {
339
- "text": "Please point out all the hats on people's heads in the image, output concatenated point coordinates like <point>x y</point><point>x y</point>",
340
- "files": ["examples/000000001000.jpeg"]
341
- },
342
- {
343
- "text": """Please detect all plate, photo, kid, cup in the image, and output all objects in the JSON format, which is a list of dict like [{"category": category, "bbox": "<bbox>x1 y1 x2 y2</bbox>"}, {"category": category, "bbox": "<bbox>x1 y1 x2 y2</bbox>"}]""",
344
- "files": ["examples/000000018380.jpeg"]
345
- }
346
- ],
347
  inputs=[gr_chatinterface_ofl.textbox],
348
  )
349
  with gr.Column(visible=False) as gr_examples_cn:
350
  gr.Examples(
351
  label='七个示例:文本,图像,视频,多个图像/视频,视觉解谜,坐标定位,开放式物体检测。',
352
- examples=[
353
- {
354
- "text": "你是谁?",
355
- "files": []
356
- },
357
- {
358
- "text": "介绍一下。",
359
- "files": ["examples/bancopy.jpg"]
360
- },
361
- {
362
- "text":
363
- "找到库里的“晚安”庆祝时间段。",
364
- "files":
365
- ["examples/I7pTpMjqNRM_1080p_small.mp4"]
366
- },
367
- {
368
- "text":
369
- "你有什么感想?",
370
- "files": [
371
- "examples/newyork.jpg",
372
- "examples/beijing.jpg"
373
- ]
374
- },
375
- {
376
- "text": "看图回答。",
377
- "files": ["examples/puzzle.jpg"]
378
- },
379
- {
380
- "text": "请点出图像中所有戴在头上的帽子, 输出串联的点坐标<point>x y</point><point>x y</point>",
381
- "files": ["examples/000000001000.jpeg"]
382
- },
383
- {
384
- "text": """请检测图像中所有的盘子、照片、小孩和杯子。请以JSON格式输出一个由字典组成的列表,就像:[{"category": 类别, "bbox": "<bbox>x1 y1 x2 y2</bbox>"}, {"category": 类别, "bbox": "<bbox>x1 y1 x2 y2</bbox>"}]""",
385
- "files": ["examples/000000018380.jpeg"]
386
- }
387
- ],
388
  inputs=[gr_chatinterface_ofl.textbox],
389
  )
390
  with gr.Tab("Online") as gr_tab_ol:
@@ -473,19 +429,23 @@ with gr.Blocks() as demo:
473
  lambda x: x,
474
  inputs=gr_temperature_ol,
475
  outputs=gr_temperature_hidden)
476
- gr_clear_button_ol = gr.Button(value=label_translations['gr_clear_button']['English'])
 
 
 
477
  def clear_history_fn():
478
  return None, [], [], [], []
 
479
  gr_clear_button_ol.click(
480
- fn=clear_history_fn,
481
  outputs=[
482
- gr_chatinterface_ol.conversation_id,
483
- gr_chatinterface_ol.saved_conversations,
 
484
  gr_chatinterface_ol.chatbot,
485
- gr_chatinterface_ol.chatbot_state,
486
  gr_infer_history_ol
487
- ]
488
- )
489
 
490
  def update_lang(lang: str):
491
  return (
 
9
  from infer import SeedVLInfer, ConversationModeI18N, ConversationModeCN
10
  from visualizer import draw_boxes_points_with_labels
11
 
12
+ infer = SeedVLInfer(model_id=os.getenv('MODEL_ID'),
13
+ api_key=os.getenv('API_KEY'))
14
 
15
  label_translations = {
16
  "gr_chatinterface_ofl": {
 
60
  }
61
  }
62
 
63
+
64
  def add_escape(text: str):
65
  return text.replace('<', '\<').replace('>', '\>')
66
 
67
+
68
  def remove_escape(text: str):
69
  return text.replace('\<', '<').replace('\>', '>')
70
 
71
+
72
  def plot_boxes_points_detections(image_path, message):
73
  detection_pattern = r'\[\s*{.*?}\s*\]'
74
+ detection_matches = re.finditer(detection_pattern,
75
+ message,
76
+ flags=re.DOTALL)
77
  bboxes, categories = [], []
78
  for match in detection_matches:
79
  matched_str = match.group(0)
80
  detections = json.loads(matched_str)
81
  for detection in detections:
82
  cat, bbox_str = detection['category'], detection['bbox']
83
+ bbox_str = bbox_str.replace('<bbox>',
84
+ '').replace('</bbox>',
85
+ '').replace('</bbox', '')
86
  bbox = list(map(float, bbox_str.split(' ')))
87
  bboxes.append(bbox)
88
  categories.append(cat)
89
  if not bboxes:
90
  box_pattern = r'<bbox>(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)</bbox>'
91
  box_matches = re.finditer(box_pattern, message)
92
+ bboxes = [[
93
+ float(match.group(1)),
94
+ float(match.group(2)),
95
+ float(match.group(3)),
96
+ float(match.group(4))
97
+ ] for match in box_matches]
98
+
99
  points = []
100
  if not bboxes:
101
  point_pattern = r'<point>(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)</point>'
102
  point_matches = re.finditer(point_pattern, message)
103
+ points = [[float(match.group(1)),
104
+ float(match.group(2))] for match in point_matches]
 
 
105
 
106
  if not bboxes and not points:
107
  return
 
117
  if points.size:
118
  points[:, 0] *= w
119
  points[:, 1] *= h
120
+ output_image = draw_boxes_points_with_labels(image, bboxes, points,
121
+ categories)
122
  return output_image
123
 
124
+
125
+ def general_chat(inputs: dict,
126
+ gr_history: list,
127
+ infer_history: list,
128
+ if_thinking: bool,
129
+ temperature: float,
130
+ online: bool = False):
131
  if 'text' in inputs:
132
  inputs['text'] = remove_escape(inputs['text'])
133
  mode = ConversationModeI18N.D if if_thinking else ConversationModeI18N.G
134
+ for response_text, infer_history, finished in infer(
135
+ inputs=inputs,
136
+ history=infer_history,
137
+ mode=mode,
138
+ temperature=temperature,
139
+ online=online):
140
  if if_thinking:
141
  reasoning_text, response_text = response_text.split('</think>')
142
  reasoning_text = reasoning_text.lstrip('<think>')
 
155
  "role": "assistant",
156
  "content": add_escape(response_text)
157
  }]
158
+ if finished and len(inputs.get(
159
+ 'files', [])) == 1 and not inputs['files'][0].endswith('.mp4'):
160
  image_path = inputs['files'][0]
161
  response_text = infer_history[-1]['content']
162
  try:
163
  if if_thinking:
164
+ reasoning_text, response_text = response_text.split(
165
+ '</think>')
166
+ output_image = plot_boxes_points_detections(
167
+ image_path, response_text)
168
  if output_image is not None:
169
  response_message.append({
170
  "role": "assistant",
 
174
  print(e)
175
  yield response_message, infer_history
176
 
177
+
178
  def online_record_chat(text: str, gr_history: list, gr_webcam_images: list,
179
  gr_counter: int, infer_history: list, if_thinking: bool,
180
  temperature: float):
 
184
  inputs = {'text': text, 'files': [webp for webp, _ in gr_webcam_images]}
185
  yield f'received {len(gr_webcam_images)} new frames, processing...', gr_counter + len(
186
  gr_webcam_images), infer_history
187
+ for response_message, infer_history in general_chat(inputs,
188
+ gr_history,
189
+ infer_history,
190
+ if_thinking,
191
+ temperature,
192
+ online=True):
193
  yield response_message, gr.skip(), infer_history
194
 
195
+
196
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
197
  with gr.Row():
198
  with gr.Column():
199
+ gr_title = gr.Markdown('<h1>Seed1.5-VL</h1>')
200
+ gr_desc = gr.Markdown('<h3>Advancing Multimodal Understanding and Reasoning.</h3>')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
+ gr_lang_selector = gr.Dropdown(choices=["English", "中文"],
203
+ value="English",
204
+ label="🌐 English Interface/中文界面",
205
+ interactive=True,
206
+ min_width=400,
207
+ scale=0)
208
+
209
  with gr.Tabs():
210
  with gr.Tab("Offline") as gr_tab_ofl:
211
  gr_infer_history = gr.State([])
 
234
  ],
235
  additional_outputs=[gr_infer_history],
236
  )
237
+
238
  def add_escape_fn(inputs: dict):
239
  if inputs and 'text' in inputs:
240
  inputs['text'] = add_escape(inputs['text'])
241
  return inputs
242
+
243
  gr_chatinterface_ofl.textbox.submit(
244
  fn=add_escape_fn,
245
  inputs=[gr_chatinterface_ofl.saved_input],
246
+ outputs=[gr_chatinterface_ofl.saved_input])
 
247
  gr.on(triggers=[gr_chatinterface_ofl.chatbot.clear],
248
  fn=lambda: [],
249
  outputs=[gr_infer_history])
 
253
  label=label_translations['gr_thinking']['English'],
254
  )
255
  gr_thinking_ofl.change(lambda x: x,
256
+ inputs=gr_thinking_ofl,
257
+ outputs=gr_thinking_hidden)
258
  gr_temperature_ofl = gr.Slider(
259
  minimum=0.0,
260
  maximum=2.0,
 
263
  label=label_translations['gr_temperature']['English'],
264
  interactive=True)
265
  gr_temperature_ofl.change(lambda x: x,
266
+ inputs=gr_temperature_ofl,
267
+ outputs=gr_temperature_hidden)
268
+ gr_clear_button_ofl = gr.Button(
269
+ value=label_translations['gr_clear_button']['English'])
270
+
271
  def clear_history_fn():
272
  return None, [], [], [], []
273
+
274
  gr_clear_button_ofl.click(
275
+ fn=clear_history_fn,
276
  outputs=[
277
+ gr_chatinterface_ofl.conversation_id,
278
+ gr_chatinterface_ofl.saved_conversations,
279
  gr_chatinterface_ofl.chatbot,
280
+ gr_chatinterface_ofl.chatbot_state, gr_infer_history
281
+ ])
 
 
282
  with gr.Column(visible=True) as gr_examples_en:
283
  gr.Examples(
284
+ label=
285
+ '7 Examples: text, image, video, multiple images/videos, visual puzzle, points grounding, open-vocabulary detection.',
286
+ examples=[{
287
+ "text": "Who are you?",
288
+ "files": []
289
+ }, {
290
+ "text": "Introduce this.",
291
+ "files": ["examples/bancopy.jpg"]
292
+ }, {
293
+ "text":
294
+ """Find Curry's "Good Night" celebration time.""",
295
+ "files": ["examples/I7pTpMjqNRM_1080p_small.mp4"]
296
+ }, {
297
+ "text":
298
+ "Share your feelings.",
299
+ "files":
300
+ ["examples/newyork.jpg", "examples/beijing.jpg"]
301
+ }, {
302
+ "text": "Look and answer.",
303
+ "files": ["examples/puzzle.jpg"]
304
+ }, {
305
+ "text":
306
+ "Please point out all the hats on people's heads in the image, output concatenated point coordinates like <point>x y</point><point>x y</point>",
307
+ "files": ["examples/000000001000.jpeg"]
308
+ }, {
309
+ "text":
310
+ """Please detect all plate, photo, kid, cup in the image, and output all objects in the JSON format, which is a list of dict like [{"category": category, "bbox": "<bbox>x1 y1 x2 y2</bbox>"}, {"category": category, "bbox": "<bbox>x1 y1 x2 y2</bbox>"}]""",
311
+ "files": ["examples/000000018380.jpeg"]
312
+ }],
 
 
 
 
 
 
 
 
313
  inputs=[gr_chatinterface_ofl.textbox],
314
  )
315
  with gr.Column(visible=False) as gr_examples_cn:
316
  gr.Examples(
317
  label='七个示例:文本,图像,视频,多个图像/视频,视觉解谜,坐标定位,开放式物体检测。',
318
+ examples=[{
319
+ "text": "你是谁?",
320
+ "files": []
321
+ }, {
322
+ "text": "介绍一下。",
323
+ "files": ["examples/bancopy.jpg"]
324
+ }, {
325
+ "text": "找到库里的“晚安”庆祝时间段。",
326
+ "files": ["examples/I7pTpMjqNRM_1080p_small.mp4"]
327
+ }, {
328
+ "text":
329
+ "你有什么感想?",
330
+ "files":
331
+ ["examples/newyork.jpg", "examples/beijing.jpg"]
332
+ }, {
333
+ "text": "看图回答。",
334
+ "files": ["examples/puzzle.jpg"]
335
+ }, {
336
+ "text":
337
+ "请点出图像中所有戴在头上的帽子, 输出串联的点坐标<point>x y</point><point>x y</point>",
338
+ "files": ["examples/000000001000.jpeg"]
339
+ }, {
340
+ "text":
341
+ """请检测图像中所有的盘子、照片、小孩和杯子。请以JSON格式输出一个由字典组成的列表,就像:[{"category": 类别, "bbox": "<bbox>x1 y1 x2 y2</bbox>"}, {"category": 类别, "bbox": "<bbox>x1 y1 x2 y2</bbox>"}]""",
342
+ "files": ["examples/000000018380.jpeg"]
343
+ }],
 
 
 
 
 
 
 
 
 
 
344
  inputs=[gr_chatinterface_ofl.textbox],
345
  )
346
  with gr.Tab("Online") as gr_tab_ol:
 
429
  lambda x: x,
430
  inputs=gr_temperature_ol,
431
  outputs=gr_temperature_hidden)
432
+ gr_clear_button_ol = gr.Button(
433
+ value=label_translations['gr_clear_button']
434
+ ['English'])
435
+
436
  def clear_history_fn():
437
  return None, [], [], [], []
438
+
439
  gr_clear_button_ol.click(
440
+ fn=clear_history_fn,
441
  outputs=[
442
+ gr_chatinterface_ol.conversation_id,
443
+ gr_chatinterface_ol.
444
+ saved_conversations,
445
  gr_chatinterface_ol.chatbot,
446
+ gr_chatinterface_ol.chatbot_state,
447
  gr_infer_history_ol
448
+ ])
 
449
 
450
  def update_lang(lang: str):
451
  return (
assets/logo.png ADDED
assets/logo.svg ADDED
infer.py CHANGED
@@ -7,7 +7,11 @@ import base64
7
  import requests
8
 
9
  import torch
10
- import decord
 
 
 
 
11
  import numpy as np
12
  from PIL import Image, ImageSequence
13
  from torchvision.io import read_image, encode_jpeg
 
7
  import requests
8
 
9
  import torch
10
+ try:
11
+ import decord
12
+ except ImportError:
13
+ print("Please install decord first.")
14
+ pass
15
  import numpy as np
16
  from PIL import Image, ImageSequence
17
  from torchvision.io import read_image, encode_jpeg