zhibinlan commited on
Commit
41345ad
·
verified ·
1 Parent(s): a3252c3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -12
README.md CHANGED
@@ -82,8 +82,7 @@ conv_template = "qwen_1_5" # Make sure you use correct chat template for differ
82
  question = DEFAULT_IMAGE_TOKEN + " Represent the given image with the following question: What is in the image"
83
  conv = copy.deepcopy(conv_templates[conv_template])
84
  conv.append_message(conv.roles[0], question)
85
- conv.append_message(conv.roles[1], "
86
- ")
87
  prompt_question = conv.get_prompt()
88
  input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
89
  attention_mask=input_ids.ne(tokenizer.pad_token_id)
@@ -93,8 +92,7 @@ query_embed = model.encode_multimodal_embeddings(input_ids, attention_mask=atten
93
  target_string = "A cat and a dog"
94
  conv = copy.deepcopy(conv_templates[conv_template])
95
  conv.append_message(conv.roles[0], target_string)
96
- conv.append_message(conv.roles[1], "
97
- ")
98
  target_string = conv.get_prompt()
99
  target_input_ids = tokenizer(target_string, return_tensors="pt").input_ids.to(device)
100
  attention_mask=target_input_ids.ne(tokenizer.pad_token_id)
@@ -106,8 +104,7 @@ print("A cat and a dog similarity score: ", query_embed @ target_embed.T)
106
  neg_string = "A cat and a tiger"
107
  conv = copy.deepcopy(conv_templates[conv_template])
108
  conv.append_message(conv.roles[0], neg_string)
109
- conv.append_message(conv.roles[1], "
110
- ")
111
  neg_string = conv.get_prompt()
112
  neg_input_ids = tokenizer(neg_string, return_tensors="pt").input_ids.to(device)
113
  attention_mask=neg_input_ids.ne(tokenizer.pad_token_id)
@@ -120,8 +117,7 @@ print("A cat and a tiger similarity score: ", query_embed @ neg_embed.T)
120
  pos_string = "Find me an everyday image that matches the given caption: A cat and a dog."
121
  conv = copy.deepcopy(conv_templates[conv_template])
122
  conv.append_message(conv.roles[0], pos_string)
123
- conv.append_message(conv.roles[1], "
124
- ")
125
  pos_string = conv.get_prompt()
126
  pos_input_ids = tokenizer(pos_string, return_tensors="pt").input_ids.to(device)
127
  attention_mask=pos_input_ids.ne(tokenizer.pad_token_id)
@@ -130,8 +126,7 @@ pos_query_embed = model.encode_multimodal_embeddings(pos_input_ids, attention_ma
130
  target = DEFAULT_IMAGE_TOKEN + " Represent the given image."
131
  conv = copy.deepcopy(conv_templates[conv_template])
132
  conv.append_message(conv.roles[0], target)
133
- conv.append_message(conv.roles[1], "
134
- ")
135
  prompt_target = conv.get_prompt()
136
  target_input_ids = tokenizer_image_token(prompt_target, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
137
  attention_mask=target_input_ids.ne(tokenizer.pad_token_id)
@@ -144,8 +139,7 @@ print("A cat and a dog image similarity score: ", pos_query_embed @ target_embed
144
  neg_string = "Find me an everyday image that matches the given caption: A cat and a tiger."
145
  conv = copy.deepcopy(conv_templates[conv_template])
146
  conv.append_message(conv.roles[0], neg_string)
147
- conv.append_message(conv.roles[1], "
148
- ")
149
  neg_string = conv.get_prompt()
150
  neg_input_ids = tokenizer(neg_string, return_tensors="pt").input_ids.to(device)
151
  attention_mask=neg_input_ids.ne(tokenizer.pad_token_id)
 
82
  question = DEFAULT_IMAGE_TOKEN + " Represent the given image with the following question: What is in the image"
83
  conv = copy.deepcopy(conv_templates[conv_template])
84
  conv.append_message(conv.roles[0], question)
85
+ conv.append_message(conv.roles[1], "\n")
 
86
  prompt_question = conv.get_prompt()
87
  input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
88
  attention_mask=input_ids.ne(tokenizer.pad_token_id)
 
92
  target_string = "A cat and a dog"
93
  conv = copy.deepcopy(conv_templates[conv_template])
94
  conv.append_message(conv.roles[0], target_string)
95
+ conv.append_message(conv.roles[1], "\n")
 
96
  target_string = conv.get_prompt()
97
  target_input_ids = tokenizer(target_string, return_tensors="pt").input_ids.to(device)
98
  attention_mask=target_input_ids.ne(tokenizer.pad_token_id)
 
104
  neg_string = "A cat and a tiger"
105
  conv = copy.deepcopy(conv_templates[conv_template])
106
  conv.append_message(conv.roles[0], neg_string)
107
+ conv.append_message(conv.roles[1], "\n")
 
108
  neg_string = conv.get_prompt()
109
  neg_input_ids = tokenizer(neg_string, return_tensors="pt").input_ids.to(device)
110
  attention_mask=neg_input_ids.ne(tokenizer.pad_token_id)
 
117
  pos_string = "Find me an everyday image that matches the given caption: A cat and a dog."
118
  conv = copy.deepcopy(conv_templates[conv_template])
119
  conv.append_message(conv.roles[0], pos_string)
120
+ conv.append_message(conv.roles[1], "\n")
 
121
  pos_string = conv.get_prompt()
122
  pos_input_ids = tokenizer(pos_string, return_tensors="pt").input_ids.to(device)
123
  attention_mask=pos_input_ids.ne(tokenizer.pad_token_id)
 
126
  target = DEFAULT_IMAGE_TOKEN + " Represent the given image."
127
  conv = copy.deepcopy(conv_templates[conv_template])
128
  conv.append_message(conv.roles[0], target)
129
+ conv.append_message(conv.roles[1], "\n")
 
130
  prompt_target = conv.get_prompt()
131
  target_input_ids = tokenizer_image_token(prompt_target, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
132
  attention_mask=target_input_ids.ne(tokenizer.pad_token_id)
 
139
  neg_string = "Find me an everyday image that matches the given caption: A cat and a tiger."
140
  conv = copy.deepcopy(conv_templates[conv_template])
141
  conv.append_message(conv.roles[0], neg_string)
142
+ conv.append_message(conv.roles[1], "\n")
 
143
  neg_string = conv.get_prompt()
144
  neg_input_ids = tokenizer(neg_string, return_tensors="pt").input_ids.to(device)
145
  attention_mask=neg_input_ids.ne(tokenizer.pad_token_id)