Update ipynb.
Browse files- biomed_clip_example.ipynb +23 -137
biomed_clip_example.ipynb
CHANGED
@@ -15,7 +15,7 @@
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
-
"execution_count":
|
19 |
"metadata": {
|
20 |
"colab": {
|
21 |
"base_uri": "https://localhost:8080/"
|
@@ -99,7 +99,7 @@
|
|
99 |
},
|
100 |
{
|
101 |
"cell_type": "code",
|
102 |
-
"execution_count":
|
103 |
"metadata": {
|
104 |
"colab": {
|
105 |
"base_uri": "https://localhost:8080/"
|
@@ -107,132 +107,12 @@
|
|
107 |
"id": "V8Yv9g_8EQ1W",
|
108 |
"outputId": "3ec24c9b-4c4f-4c17-8d76-6cfd74bb8bdf"
|
109 |
},
|
110 |
-
"outputs": [
|
111 |
-
{
|
112 |
-
"name": "stderr",
|
113 |
-
"output_type": "stream",
|
114 |
-
"text": [
|
115 |
-
"/home/shezhan/anaconda3/envs/biomedclip/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
116 |
-
" from .autonotebook import tqdm as notebook_tqdm\n"
|
117 |
-
]
|
118 |
-
}
|
119 |
-
],
|
120 |
"source": [
|
121 |
-
"import
|
122 |
"\n",
|
123 |
-
"model,
|
124 |
-
"tokenizer =
|
125 |
-
]
|
126 |
-
},
|
127 |
-
{
|
128 |
-
"attachments": {},
|
129 |
-
"cell_type": "markdown",
|
130 |
-
"metadata": {
|
131 |
-
"id": "bk0hm1R7qqU_"
|
132 |
-
},
|
133 |
-
"source": [
|
134 |
-
"# Download sample images"
|
135 |
-
]
|
136 |
-
},
|
137 |
-
{
|
138 |
-
"cell_type": "code",
|
139 |
-
"execution_count": 3,
|
140 |
-
"metadata": {
|
141 |
-
"colab": {
|
142 |
-
"base_uri": "https://localhost:8080/",
|
143 |
-
"height": 67,
|
144 |
-
"referenced_widgets": [
|
145 |
-
"692f8c386f9743a1a12f7d6c7959ca67",
|
146 |
-
"3e0f188e73294f6ea4d1e28640cfdc22",
|
147 |
-
"b754e18c5c49499d92db4803cfa426b7",
|
148 |
-
"6743cbc5ca2c47e7be565e0d6cd933c9",
|
149 |
-
"02aa2c49f2a94a7eb48794ed783c93e8",
|
150 |
-
"ad84c0ed082d4ab7abf2815fc1910efa",
|
151 |
-
"87a18840cc2c45ac824e8fe3d83d5150",
|
152 |
-
"0b3b4fc0e99a47d0a494aee20166337f",
|
153 |
-
"2de24c12eebd4054a3e6163fb6951986",
|
154 |
-
"1c9af9a39e594c689590d09ae71baeb3",
|
155 |
-
"182cc15b918a45d081543a6b3f182a07"
|
156 |
-
]
|
157 |
-
},
|
158 |
-
"id": "qqafKW1kqgc4",
|
159 |
-
"outputId": "34c29f78-32c5-4a6f-914e-30e8a07840a6"
|
160 |
-
},
|
161 |
-
"outputs": [
|
162 |
-
{
|
163 |
-
"name": "stderr",
|
164 |
-
"output_type": "stream",
|
165 |
-
"text": [
|
166 |
-
"README.md: 100%|██████████| 4.13k/4.13k [00:00<00:00, 7.84MB/s]\n",
|
167 |
-
"biomed-vlp-eval.svg: 100%|██████████| 63.4k/63.4k [00:00<00:00, 9.46MB/s]\n",
|
168 |
-
"\n",
|
169 |
-
"(…)e_data/adenocarcinoma_histopathology.jpg: 100%|██████████| 26.9k/26.9k [00:00<00:00, 8.89MB/s]\n",
|
170 |
-
"\n",
|
171 |
-
"\n",
|
172 |
-
"(…)tion_example_data/IHC_histopathology.jpg: 100%|██████████| 181k/181k [00:00<00:00, 11.9MB/s]\n",
|
173 |
-
"(…)_example_data/H_and_E_histopathology.jpg: 100%|██████████| 177k/177k [00:00<00:00, 5.38MB/s]\n",
|
174 |
-
"\n",
|
175 |
-
"\n",
|
176 |
-
"biomed_clip_example.ipynb: 100%|██████████| 2.88M/2.88M [00:00<00:00, 26.7MB/s]\n",
|
177 |
-
"LICENSE.md: 100%|██████████| 1.07k/1.07k [00:00<00:00, 9.03MB/s]\n",
|
178 |
-
".gitattributes: 100%|██████████| 1.48k/1.48k [00:00<00:00, 8.57MB/s]\n",
|
179 |
-
"(…)assification_example_data/bone_X-ray.jpg: 100%|██████████| 7.44k/7.44k [00:00<00:00, 13.3MB/s]\n",
|
180 |
-
"(…)lassification_example_data/brain_MRI.jpg: 100%|██████████| 128k/128k [00:00<00:00, 27.3MB/s]\n",
|
181 |
-
"(…)cation_example_data/covid_line_chart.png: 100%|██████████| 6.30k/6.30k [00:00<00:00, 10.7MB/s]\n",
|
182 |
-
"\n",
|
183 |
-
"(…)lassification_example_data/pie_chart.png: 100%|██████████| 371k/371k [00:00<00:00, 29.6MB/s]\n",
|
184 |
-
"\n",
|
185 |
-
"special_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 941kB/s]\n",
|
186 |
-
"\n",
|
187 |
-
"(…)ssification_example_data/chest_X-ray.jpg: 100%|██████████| 906k/906k [00:00<00:00, 4.06MB/s]\n",
|
188 |
-
"vocab.txt: 100%|██████████| 225k/225k [00:00<00:00, 84.4MB/s]it/s]\n",
|
189 |
-
"(…)amous_cell_carcinoma_histopathology.jpeg: 100%|██████████| 17.2k/17.2k [00:00<00:00, 24.9MB/s]\n",
|
190 |
-
"tokenizer_config.json: 100%|██████████| 394/394 [00:00<00:00, 1.78MB/s]\n",
|
191 |
-
"\n",
|
192 |
-
"tokenizer.json: 100%|██████████| 679k/679k [00:00<00:00, 3.06MB/s]\n",
|
193 |
-
"Fetching 20 files: 100%|���█████████| 20/20 [00:01<00:00, 19.93it/s]\n"
|
194 |
-
]
|
195 |
-
},
|
196 |
-
{
|
197 |
-
"data": {
|
198 |
-
"text/plain": [
|
199 |
-
"'/home/shezhan/repos/biomedclip/biomed-clip-share'"
|
200 |
-
]
|
201 |
-
},
|
202 |
-
"execution_count": 3,
|
203 |
-
"metadata": {},
|
204 |
-
"output_type": "execute_result"
|
205 |
-
}
|
206 |
-
],
|
207 |
-
"source": [
|
208 |
-
"from huggingface_hub import snapshot_download\n",
|
209 |
-
"snapshot_download(\"microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224\", local_dir=\"biomed-clip-share\")"
|
210 |
-
]
|
211 |
-
},
|
212 |
-
{
|
213 |
-
"cell_type": "code",
|
214 |
-
"execution_count": 4,
|
215 |
-
"metadata": {
|
216 |
-
"colab": {
|
217 |
-
"base_uri": "https://localhost:8080/"
|
218 |
-
},
|
219 |
-
"id": "4WOxBdKr0e_m",
|
220 |
-
"outputId": "2a05beae-6f5f-4c3c-ef59-b23210b6e1b5"
|
221 |
-
},
|
222 |
-
"outputs": [
|
223 |
-
{
|
224 |
-
"name": "stdout",
|
225 |
-
"output_type": "stream",
|
226 |
-
"text": [
|
227 |
-
"biomed_clip_example.ipynb open_clip_config.json\ttokenizer_config.json\n",
|
228 |
-
"biomed-vlp-eval.svg\t open_clip_pytorch_model.bin\ttokenizer.json\n",
|
229 |
-
"example_data\t\t README.md\t\t\tvocab.txt\n",
|
230 |
-
"LICENSE.md\t\t special_tokens_map.json\n"
|
231 |
-
]
|
232 |
-
}
|
233 |
-
],
|
234 |
-
"source": [
|
235 |
-
"!ls biomed-clip-share"
|
236 |
]
|
237 |
},
|
238 |
{
|
@@ -247,7 +127,7 @@
|
|
247 |
},
|
248 |
{
|
249 |
"cell_type": "code",
|
250 |
-
"execution_count":
|
251 |
"metadata": {
|
252 |
"colab": {
|
253 |
"base_uri": "https://localhost:8080/"
|
@@ -372,14 +252,10 @@
|
|
372 |
}
|
373 |
],
|
374 |
"source": [
|
375 |
-
"import glob\n",
|
376 |
-
"from collections import OrderedDict\n",
|
377 |
-
"\n",
|
378 |
"import torch\n",
|
|
|
379 |
"from PIL import Image\n",
|
380 |
-
"import open_clip\n",
|
381 |
"\n",
|
382 |
-
"dataset_path = 'biomed-clip-share/example_data/biomed_image_classification_example_data'\n",
|
383 |
"template = 'this is a photo of '\n",
|
384 |
"labels = [\n",
|
385 |
" 'adenocarcinoma histopathology',\n",
|
@@ -393,15 +269,25 @@
|
|
393 |
" 'hematoxylin and eosin histopathology'\n",
|
394 |
"]\n",
|
395 |
"\n",
|
396 |
-
"
|
397 |
-
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
"device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
|
399 |
"model.to(device)\n",
|
400 |
"model.eval()\n",
|
401 |
"\n",
|
402 |
"context_length = 256\n",
|
403 |
"\n",
|
404 |
-
"images = torch.stack([
|
405 |
"texts = tokenizer([template + l for l in labels], context_length=context_length).to(device)\n",
|
406 |
"with torch.no_grad():\n",
|
407 |
" image_features, text_features, logit_scale = model(images, texts)\n",
|
@@ -547,7 +433,7 @@
|
|
547 |
},
|
548 |
{
|
549 |
"cell_type": "code",
|
550 |
-
"execution_count":
|
551 |
"metadata": {
|
552 |
"colab": {
|
553 |
"base_uri": "https://localhost:8080/",
|
@@ -576,7 +462,7 @@
|
|
576 |
" fig, axes = plt.subplots(nrows=num_images, ncols=1, figsize=(5, 5 * num_images))\n",
|
577 |
"\n",
|
578 |
" for i, (img_path, metadata) in enumerate(zip(images, metadata)):\n",
|
579 |
-
" img = Image.open(img_path)\n",
|
580 |
" ax = axes[i]\n",
|
581 |
" ax.imshow(img)\n",
|
582 |
" ax.axis('off')\n",
|
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
+
"execution_count": 19,
|
19 |
"metadata": {
|
20 |
"colab": {
|
21 |
"base_uri": "https://localhost:8080/"
|
|
|
99 |
},
|
100 |
{
|
101 |
"cell_type": "code",
|
102 |
+
"execution_count": 20,
|
103 |
"metadata": {
|
104 |
"colab": {
|
105 |
"base_uri": "https://localhost:8080/"
|
|
|
107 |
"id": "V8Yv9g_8EQ1W",
|
108 |
"outputId": "3ec24c9b-4c4f-4c17-8d76-6cfd74bb8bdf"
|
109 |
},
|
110 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
"source": [
|
112 |
+
"from open_clip import create_model_from_pretrained, get_tokenizer # works on open-clip-torch>=2.23.0, timm>=0.9.8\n",
|
113 |
"\n",
|
114 |
+
"model, preprocess = create_model_from_pretrained('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')\n",
|
115 |
+
"tokenizer = get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
]
|
117 |
},
|
118 |
{
|
|
|
127 |
},
|
128 |
{
|
129 |
"cell_type": "code",
|
130 |
+
"execution_count": 21,
|
131 |
"metadata": {
|
132 |
"colab": {
|
133 |
"base_uri": "https://localhost:8080/"
|
|
|
252 |
}
|
253 |
],
|
254 |
"source": [
|
|
|
|
|
|
|
255 |
"import torch\n",
|
256 |
+
"from urllib.request import urlopen\n",
|
257 |
"from PIL import Image\n",
|
|
|
258 |
"\n",
|
|
|
259 |
"template = 'this is a photo of '\n",
|
260 |
"labels = [\n",
|
261 |
" 'adenocarcinoma histopathology',\n",
|
|
|
269 |
" 'hematoxylin and eosin histopathology'\n",
|
270 |
"]\n",
|
271 |
"\n",
|
272 |
+
"dataset_url = 'https://huggingface.co/microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224/resolve/main/example_data/biomed_image_classification_example_data/'\n",
|
273 |
+
"test_imgs = [\n",
|
274 |
+
" 'squamous_cell_carcinoma_histopathology.jpeg',\n",
|
275 |
+
" 'H_and_E_histopathology.jpg',\n",
|
276 |
+
" 'bone_X-ray.jpg',\n",
|
277 |
+
" 'adenocarcinoma_histopathology.jpg',\n",
|
278 |
+
" 'covid_line_chart.png',\n",
|
279 |
+
" 'IHC_histopathology.jpg',\n",
|
280 |
+
" 'chest_X-ray.jpg',\n",
|
281 |
+
" 'brain_MRI.jpg',\n",
|
282 |
+
" 'pie_chart.png'\n",
|
283 |
+
"]\n",
|
284 |
"device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
|
285 |
"model.to(device)\n",
|
286 |
"model.eval()\n",
|
287 |
"\n",
|
288 |
"context_length = 256\n",
|
289 |
"\n",
|
290 |
+
"images = torch.stack([preprocess(Image.open(urlopen(dataset_url + img))) for img in test_imgs]).to(device)\n",
|
291 |
"texts = tokenizer([template + l for l in labels], context_length=context_length).to(device)\n",
|
292 |
"with torch.no_grad():\n",
|
293 |
" image_features, text_features, logit_scale = model(images, texts)\n",
|
|
|
433 |
},
|
434 |
{
|
435 |
"cell_type": "code",
|
436 |
+
"execution_count": 22,
|
437 |
"metadata": {
|
438 |
"colab": {
|
439 |
"base_uri": "https://localhost:8080/",
|
|
|
462 |
" fig, axes = plt.subplots(nrows=num_images, ncols=1, figsize=(5, 5 * num_images))\n",
|
463 |
"\n",
|
464 |
" for i, (img_path, metadata) in enumerate(zip(images, metadata)):\n",
|
465 |
+
" img = Image.open(urlopen(dataset_url + img_path))\n",
|
466 |
" ax = axes[i]\n",
|
467 |
" ax.imshow(img)\n",
|
468 |
" ax.axis('off')\n",
|