Update README.md
Browse files
README.md
CHANGED
@@ -82,8 +82,7 @@ conv_template = "qwen_1_5" # Make sure you use correct chat template for differ
|
|
82 |
question = DEFAULT_IMAGE_TOKEN + " Represent the given image with the following question: What is in the image"
|
83 |
conv = copy.deepcopy(conv_templates[conv_template])
|
84 |
conv.append_message(conv.roles[0], question)
|
85 |
-
conv.append_message(conv.roles[1], "
|
86 |
-
")
|
87 |
prompt_question = conv.get_prompt()
|
88 |
input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
|
89 |
attention_mask=input_ids.ne(tokenizer.pad_token_id)
|
@@ -93,8 +92,7 @@ query_embed = model.encode_multimodal_embeddings(input_ids, attention_mask=atten
|
|
93 |
target_string = "A cat and a dog"
|
94 |
conv = copy.deepcopy(conv_templates[conv_template])
|
95 |
conv.append_message(conv.roles[0], target_string)
|
96 |
-
conv.append_message(conv.roles[1], "
|
97 |
-
")
|
98 |
target_string = conv.get_prompt()
|
99 |
target_input_ids = tokenizer(target_string, return_tensors="pt").input_ids.to(device)
|
100 |
attention_mask=target_input_ids.ne(tokenizer.pad_token_id)
|
@@ -106,8 +104,7 @@ print("A cat and a dog similarity score: ", query_embed @ target_embed.T)
|
|
106 |
neg_string = "A cat and a tiger"
|
107 |
conv = copy.deepcopy(conv_templates[conv_template])
|
108 |
conv.append_message(conv.roles[0], neg_string)
|
109 |
-
conv.append_message(conv.roles[1], "
|
110 |
-
")
|
111 |
neg_string = conv.get_prompt()
|
112 |
neg_input_ids = tokenizer(neg_string, return_tensors="pt").input_ids.to(device)
|
113 |
attention_mask=neg_input_ids.ne(tokenizer.pad_token_id)
|
@@ -120,8 +117,7 @@ print("A cat and a tiger similarity score: ", query_embed @ neg_embed.T)
|
|
120 |
pos_string = "Find me an everyday image that matches the given caption: A cat and a dog."
|
121 |
conv = copy.deepcopy(conv_templates[conv_template])
|
122 |
conv.append_message(conv.roles[0], pos_string)
|
123 |
-
conv.append_message(conv.roles[1], "
|
124 |
-
")
|
125 |
pos_string = conv.get_prompt()
|
126 |
pos_input_ids = tokenizer(pos_string, return_tensors="pt").input_ids.to(device)
|
127 |
attention_mask=pos_input_ids.ne(tokenizer.pad_token_id)
|
@@ -130,8 +126,7 @@ pos_query_embed = model.encode_multimodal_embeddings(pos_input_ids, attention_ma
|
|
130 |
target = DEFAULT_IMAGE_TOKEN + " Represent the given image."
|
131 |
conv = copy.deepcopy(conv_templates[conv_template])
|
132 |
conv.append_message(conv.roles[0], target)
|
133 |
-
conv.append_message(conv.roles[1], "
|
134 |
-
")
|
135 |
prompt_target = conv.get_prompt()
|
136 |
target_input_ids = tokenizer_image_token(prompt_target, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
|
137 |
attention_mask=target_input_ids.ne(tokenizer.pad_token_id)
|
@@ -144,8 +139,7 @@ print("A cat and a dog image similarity score: ", pos_query_embed @ target_embed
|
|
144 |
neg_string = "Find me an everyday image that matches the given caption: A cat and a tiger."
|
145 |
conv = copy.deepcopy(conv_templates[conv_template])
|
146 |
conv.append_message(conv.roles[0], neg_string)
|
147 |
-
conv.append_message(conv.roles[1], "
|
148 |
-
")
|
149 |
neg_string = conv.get_prompt()
|
150 |
neg_input_ids = tokenizer(neg_string, return_tensors="pt").input_ids.to(device)
|
151 |
attention_mask=neg_input_ids.ne(tokenizer.pad_token_id)
|
|
|
82 |
question = DEFAULT_IMAGE_TOKEN + " Represent the given image with the following question: What is in the image"
|
83 |
conv = copy.deepcopy(conv_templates[conv_template])
|
84 |
conv.append_message(conv.roles[0], question)
|
85 |
+
conv.append_message(conv.roles[1], "\n")
|
|
|
86 |
prompt_question = conv.get_prompt()
|
87 |
input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
|
88 |
attention_mask=input_ids.ne(tokenizer.pad_token_id)
|
|
|
92 |
target_string = "A cat and a dog"
|
93 |
conv = copy.deepcopy(conv_templates[conv_template])
|
94 |
conv.append_message(conv.roles[0], target_string)
|
95 |
+
conv.append_message(conv.roles[1], "\n")
|
|
|
96 |
target_string = conv.get_prompt()
|
97 |
target_input_ids = tokenizer(target_string, return_tensors="pt").input_ids.to(device)
|
98 |
attention_mask=target_input_ids.ne(tokenizer.pad_token_id)
|
|
|
104 |
neg_string = "A cat and a tiger"
|
105 |
conv = copy.deepcopy(conv_templates[conv_template])
|
106 |
conv.append_message(conv.roles[0], neg_string)
|
107 |
+
conv.append_message(conv.roles[1], "\n")
|
|
|
108 |
neg_string = conv.get_prompt()
|
109 |
neg_input_ids = tokenizer(neg_string, return_tensors="pt").input_ids.to(device)
|
110 |
attention_mask=neg_input_ids.ne(tokenizer.pad_token_id)
|
|
|
117 |
pos_string = "Find me an everyday image that matches the given caption: A cat and a dog."
|
118 |
conv = copy.deepcopy(conv_templates[conv_template])
|
119 |
conv.append_message(conv.roles[0], pos_string)
|
120 |
+
conv.append_message(conv.roles[1], "\n")
|
|
|
121 |
pos_string = conv.get_prompt()
|
122 |
pos_input_ids = tokenizer(pos_string, return_tensors="pt").input_ids.to(device)
|
123 |
attention_mask=pos_input_ids.ne(tokenizer.pad_token_id)
|
|
|
126 |
target = DEFAULT_IMAGE_TOKEN + " Represent the given image."
|
127 |
conv = copy.deepcopy(conv_templates[conv_template])
|
128 |
conv.append_message(conv.roles[0], target)
|
129 |
+
conv.append_message(conv.roles[1], "\n")
|
|
|
130 |
prompt_target = conv.get_prompt()
|
131 |
target_input_ids = tokenizer_image_token(prompt_target, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
|
132 |
attention_mask=target_input_ids.ne(tokenizer.pad_token_id)
|
|
|
139 |
neg_string = "Find me an everyday image that matches the given caption: A cat and a tiger."
|
140 |
conv = copy.deepcopy(conv_templates[conv_template])
|
141 |
conv.append_message(conv.roles[0], neg_string)
|
142 |
+
conv.append_message(conv.roles[1], "\n")
|
|
|
143 |
neg_string = conv.get_prompt()
|
144 |
neg_input_ids = tokenizer(neg_string, return_tensors="pt").input_ids.to(device)
|
145 |
attention_mask=neg_input_ids.ne(tokenizer.pad_token_id)
|