jxu124
/

TiO

Feature Extraction

Transformers

PyTorch

English

tio

custom_code

Model card Files Files and versions Community

xj commited on Sep 28, 2023

Commit

a2c1aea

2 Parent(s): ea05f46 0bebf2f

Merge branch 'main' of https://huggingface.co/jxu124/TiO

Browse files

Files changed (1) hide show

README.md +32 -29

README.md CHANGED Viewed

@@ -9,41 +9,41 @@ language:
 TiO is an Interactive Visual Grounding Model for Disambiguation.  (WIP)
-## Online / offline Demo
 ```python
 from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
 model_id = "jxu124/TiO"
-model = AutoModel.from_pretrained(
-    model_id,
-    trust_remote_code=True,
-    torch_dtype=torch.float16,
-    device_map='cuda',
-    # load_in_4bit=True,
-    # bnb_4bit_compute_dtype=torch.float16,
-)
 tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
 image_processor = AutoImageProcessor.from_pretrained(model_id)
-# setup gradio demo
-model.get_gradio_demo(tokenizer, image_processor).\
-    queue(max_size=20).launch(server_name="0.0.0.0", server_port=7860)
 ```
 ## Mini-Example
 ```python
 from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
 from PIL import Image
 from io import BytesIO
-import torch
 import requests
-# Load model, tokenizer, image_processor
-tokenizer = AutoTokenizer.from_pretrained("jxu124/TiO", use_fast=False)
-image_processor = AutoImageProcessor.from_pretrained("jxu124/TiO")
-model = AutoModel.from_pretrained("jxu124/TiO", trust_remote_code=True)
-model = model.to(torch.float16).cuda()  # It would be faster.
 # Prepare example
 image = Image.open(BytesIO(requests.get("http://images.cocodataset.org/val2014/COCO_val2014_000000429913.jpg").content))
 text = """\
@@ -64,25 +64,28 @@ print(tokenizer.batch_decode(gen, skip_special_tokens=True).replace("not yet.",
 Guesser(grounding):
 ```python
-text = """ #instruction: which region does the context describe? \n #context: \"\
 human: look that man in white!
 agent: is he the one who just threw the ball?
-human: yes. I mean the pitcher.\"
-"""
 ```
 Questioner(question generation):
 ```python
-text = """ #instruction: guess what I want? \n #context: \"\
-human: look that man in white! \"
-"""
 ```
 Oracle(answering):
 ```python
-text = """ #instruction: answer the question based on the region. \n #context: \"\
 agent: look that man in white!
-human: is he the one who just threw the ball? \"
-#region: <bin_847> <bin_319> <bin_923> <bin_467>
-"""
 ```

 TiO is an Interactive Visual Grounding Model for Disambiguation.  (WIP)
+## Online / Offline Demo
+- [Colab Online Demo](https://colab.research.google.com/drive/195eDITKi6dahnVz8Cum91sNUCF_lFle8?usp=sharing) - Free T4 is available on Google Colab.
+- Gradio Offline Demo:
 ```python
+import os; os.system("pip3 install transformers gradio fire accelerate bitsandbytes > /dev/null")
 from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
+import torch
 model_id = "jxu124/TiO"
+model = AutoModel.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float16).cuda()
 tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
 image_processor = AutoImageProcessor.from_pretrained(model_id)
+# ---- gradio demo ----
+model.get_gradio_demo(tokenizer, image_processor).queue(max_size=20).launch(server_name="0.0.0.0", server_port=7860)
 ```
 ## Mini-Example
 ```python
+import os; os.system("pip3 install transformers accelerate bitsandbytes gradio fire")
 from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
+import torch
+model_id = "jxu124/TiO"
+model = AutoModel.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float16).cuda()
+tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
+image_processor = AutoImageProcessor.from_pretrained(model_id)
+# ---- mini example ----
 from PIL import Image
 from io import BytesIO
 import requests
 # Prepare example
 image = Image.open(BytesIO(requests.get("http://images.cocodataset.org/val2014/COCO_val2014_000000429913.jpg").content))
 text = """\
 Guesser(grounding):
 ```python
+text = """\
+#instruction: which region does the context describe?
+#context:
 human: look that man in white!
 agent: is he the one who just threw the ball?
+human: yes. I mean the pitcher."""
 ```
 Questioner(question generation):
 ```python
+text = """\
+#instruction: guess what I want?
+#context:
+human: look that man in white!"""
 ```
 Oracle(answering):
 ```python
+text = """\
+#instruction: answer the question based on the region.
+#context:
 agent: look that man in white!
+human: is he the one who just threw the ball?
+#region: <bin_847> <bin_319> <bin_923> <bin_467>"""
 ```