- README.md +41 -0
- assets/banner.png +3 -0
README.md
CHANGED
@@ -18,8 +18,49 @@ library_name: transformers
|
|
18 |
pipeline_tag: video-text-to-text
|
19 |
model_name: ViCA-7B
|
20 |
base_model: lmms-lab/LLaVA-Video-7B-Qwen2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
---
|
22 |
|
|
|
|
|
|
|
|
|
23 |
# ViCA-7B: Visuospatial Cognitive Assistant
|
24 |
|
25 |
## Overview
|
|
|
18 |
pipeline_tag: video-text-to-text
|
19 |
model_name: ViCA-7B
|
20 |
base_model: lmms-lab/LLaVA-Video-7B-Qwen2
|
21 |
+
model-index:
|
22 |
+
- name: ViCA-7B
|
23 |
+
results:
|
24 |
+
- task:
|
25 |
+
type: visual-question-answering
|
26 |
+
dataset:
|
27 |
+
name: VSI-Bench
|
28 |
+
type: vsi-bench
|
29 |
+
metrics:
|
30 |
+
- type: score
|
31 |
+
value: 60.56
|
32 |
+
name: Average
|
33 |
+
verified: false
|
34 |
+
- type: MRA
|
35 |
+
value: 68.81
|
36 |
+
name: Object Count
|
37 |
+
- type: MRA
|
38 |
+
value: 57.01
|
39 |
+
name: Absolute Distance
|
40 |
+
- type: MRA
|
41 |
+
value: 79.17
|
42 |
+
name: Object Size
|
43 |
+
- type: MRA
|
44 |
+
value: 75.14
|
45 |
+
name: Room Size
|
46 |
+
- type: accuracy
|
47 |
+
value: 58.45
|
48 |
+
name: Relative Distance
|
49 |
+
- type: accuracy
|
50 |
+
value: 42.56
|
51 |
+
name: Relative Direction
|
52 |
+
- type: accuracy
|
53 |
+
value: 34.54
|
54 |
+
name: Route Plan
|
55 |
+
- type: accuracy
|
56 |
+
value: 68.77
|
57 |
+
name: Appearance Order
|
58 |
---
|
59 |
|
60 |
+
<div align="center">
|
61 |
+
<img src="assets/banner.png" alt="ViCA Banner"/>
|
62 |
+
</div>
|
63 |
+
|
64 |
# ViCA-7B: Visuospatial Cognitive Assistant
|
65 |
|
66 |
## Overview
|
assets/banner.png
ADDED
![]() |
Git LFS Details
|