Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -82,7 +82,11 @@ def infer(image, labels):
|
|
82 |
|
83 |
with gr.Blocks() as demo:
|
84 |
gr.Markdown("# Compare Multilingual Zero-shot Image Classification")
|
85 |
-
gr.Markdown("Compare the performance of SigLIP and
|
|
|
|
|
|
|
|
|
86 |
with gr.Row():
|
87 |
with gr.Column():
|
88 |
image_input = gr.Image(type="pil")
|
|
|
82 |
|
83 |
with gr.Blocks() as demo:
|
84 |
gr.Markdown("# Compare Multilingual Zero-shot Image Classification")
|
85 |
+
gr.Markdown("Compare the performance of SigLIP and other models on zero-shot classification in this Space.")
|
86 |
+
gr.Markdown("Three models are compared: CLIP-ViT, NLLB-CLIP and SigLIP. Note that SigLIP outputs are normalized for visualization purposes.")
|
87 |
+
gr.Markdown("NLLB-CLIP is a multilingual vision-language model that combines [NLLB](https://ai.meta.com/research/no-language-left-behind/) with [CLIP](https://openai.com/research/clip) to extend CLIP to 200+ languages.")
|
88 |
+
gr.Markdown("CLIP-ViT is CLIP model extended to other languages using [multilingual knowledge distillation](https://arxiv.org/abs/2004.09813).")
|
89 |
+
gr.Markdown("Finally, SigLIP is the state-of-the-art vision-language model released by Google. Multilingual checkpoint is pre-trained by Google.")
|
90 |
with gr.Row():
|
91 |
with gr.Column():
|
92 |
image_input = gr.Image(type="pil")
|