shengz commited on
Commit
f42acd8
·
1 Parent(s): 86ec378

Update ipynb.

Browse files
Files changed (1) hide show
  1. biomed_clip_example.ipynb +23 -137
biomed_clip_example.ipynb CHANGED
@@ -15,7 +15,7 @@
15
  },
16
  {
17
  "cell_type": "code",
18
- "execution_count": 1,
19
  "metadata": {
20
  "colab": {
21
  "base_uri": "https://localhost:8080/"
@@ -99,7 +99,7 @@
99
  },
100
  {
101
  "cell_type": "code",
102
- "execution_count": 2,
103
  "metadata": {
104
  "colab": {
105
  "base_uri": "https://localhost:8080/"
@@ -107,132 +107,12 @@
107
  "id": "V8Yv9g_8EQ1W",
108
  "outputId": "3ec24c9b-4c4f-4c17-8d76-6cfd74bb8bdf"
109
  },
110
- "outputs": [
111
- {
112
- "name": "stderr",
113
- "output_type": "stream",
114
- "text": [
115
- "/home/shezhan/anaconda3/envs/biomedclip/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
116
- " from .autonotebook import tqdm as notebook_tqdm\n"
117
- ]
118
- }
119
- ],
120
  "source": [
121
- "import open_clip\n",
122
  "\n",
123
- "model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')\n",
124
- "tokenizer = open_clip.get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')"
125
- ]
126
- },
127
- {
128
- "attachments": {},
129
- "cell_type": "markdown",
130
- "metadata": {
131
- "id": "bk0hm1R7qqU_"
132
- },
133
- "source": [
134
- "# Download sample images"
135
- ]
136
- },
137
- {
138
- "cell_type": "code",
139
- "execution_count": 3,
140
- "metadata": {
141
- "colab": {
142
- "base_uri": "https://localhost:8080/",
143
- "height": 67,
144
- "referenced_widgets": [
145
- "692f8c386f9743a1a12f7d6c7959ca67",
146
- "3e0f188e73294f6ea4d1e28640cfdc22",
147
- "b754e18c5c49499d92db4803cfa426b7",
148
- "6743cbc5ca2c47e7be565e0d6cd933c9",
149
- "02aa2c49f2a94a7eb48794ed783c93e8",
150
- "ad84c0ed082d4ab7abf2815fc1910efa",
151
- "87a18840cc2c45ac824e8fe3d83d5150",
152
- "0b3b4fc0e99a47d0a494aee20166337f",
153
- "2de24c12eebd4054a3e6163fb6951986",
154
- "1c9af9a39e594c689590d09ae71baeb3",
155
- "182cc15b918a45d081543a6b3f182a07"
156
- ]
157
- },
158
- "id": "qqafKW1kqgc4",
159
- "outputId": "34c29f78-32c5-4a6f-914e-30e8a07840a6"
160
- },
161
- "outputs": [
162
- {
163
- "name": "stderr",
164
- "output_type": "stream",
165
- "text": [
166
- "README.md: 100%|██████████| 4.13k/4.13k [00:00<00:00, 7.84MB/s]\n",
167
- "biomed-vlp-eval.svg: 100%|██████████| 63.4k/63.4k [00:00<00:00, 9.46MB/s]\n",
168
- "\n",
169
- "(…)e_data/adenocarcinoma_histopathology.jpg: 100%|██████████| 26.9k/26.9k [00:00<00:00, 8.89MB/s]\n",
170
- "\n",
171
- "\n",
172
- "(…)tion_example_data/IHC_histopathology.jpg: 100%|██████████| 181k/181k [00:00<00:00, 11.9MB/s]\n",
173
- "(…)_example_data/H_and_E_histopathology.jpg: 100%|██████████| 177k/177k [00:00<00:00, 5.38MB/s]\n",
174
- "\n",
175
- "\n",
176
- "biomed_clip_example.ipynb: 100%|██████████| 2.88M/2.88M [00:00<00:00, 26.7MB/s]\n",
177
- "LICENSE.md: 100%|██████████| 1.07k/1.07k [00:00<00:00, 9.03MB/s]\n",
178
- ".gitattributes: 100%|██████████| 1.48k/1.48k [00:00<00:00, 8.57MB/s]\n",
179
- "(…)assification_example_data/bone_X-ray.jpg: 100%|██████████| 7.44k/7.44k [00:00<00:00, 13.3MB/s]\n",
180
- "(…)lassification_example_data/brain_MRI.jpg: 100%|██████████| 128k/128k [00:00<00:00, 27.3MB/s]\n",
181
- "(…)cation_example_data/covid_line_chart.png: 100%|██████████| 6.30k/6.30k [00:00<00:00, 10.7MB/s]\n",
182
- "\n",
183
- "(…)lassification_example_data/pie_chart.png: 100%|██████████| 371k/371k [00:00<00:00, 29.6MB/s]\n",
184
- "\n",
185
- "special_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 941kB/s]\n",
186
- "\n",
187
- "(…)ssification_example_data/chest_X-ray.jpg: 100%|██████████| 906k/906k [00:00<00:00, 4.06MB/s]\n",
188
- "vocab.txt: 100%|██████████| 225k/225k [00:00<00:00, 84.4MB/s]it/s]\n",
189
- "(…)amous_cell_carcinoma_histopathology.jpeg: 100%|██████████| 17.2k/17.2k [00:00<00:00, 24.9MB/s]\n",
190
- "tokenizer_config.json: 100%|██████████| 394/394 [00:00<00:00, 1.78MB/s]\n",
191
- "\n",
192
- "tokenizer.json: 100%|██████████| 679k/679k [00:00<00:00, 3.06MB/s]\n",
193
- "Fetching 20 files: 100%|���█████████| 20/20 [00:01<00:00, 19.93it/s]\n"
194
- ]
195
- },
196
- {
197
- "data": {
198
- "text/plain": [
199
- "'/home/shezhan/repos/biomedclip/biomed-clip-share'"
200
- ]
201
- },
202
- "execution_count": 3,
203
- "metadata": {},
204
- "output_type": "execute_result"
205
- }
206
- ],
207
- "source": [
208
- "from huggingface_hub import snapshot_download\n",
209
- "snapshot_download(\"microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224\", local_dir=\"biomed-clip-share\")"
210
- ]
211
- },
212
- {
213
- "cell_type": "code",
214
- "execution_count": 4,
215
- "metadata": {
216
- "colab": {
217
- "base_uri": "https://localhost:8080/"
218
- },
219
- "id": "4WOxBdKr0e_m",
220
- "outputId": "2a05beae-6f5f-4c3c-ef59-b23210b6e1b5"
221
- },
222
- "outputs": [
223
- {
224
- "name": "stdout",
225
- "output_type": "stream",
226
- "text": [
227
- "biomed_clip_example.ipynb open_clip_config.json\ttokenizer_config.json\n",
228
- "biomed-vlp-eval.svg\t open_clip_pytorch_model.bin\ttokenizer.json\n",
229
- "example_data\t\t README.md\t\t\tvocab.txt\n",
230
- "LICENSE.md\t\t special_tokens_map.json\n"
231
- ]
232
- }
233
- ],
234
- "source": [
235
- "!ls biomed-clip-share"
236
  ]
237
  },
238
  {
@@ -247,7 +127,7 @@
247
  },
248
  {
249
  "cell_type": "code",
250
- "execution_count": 5,
251
  "metadata": {
252
  "colab": {
253
  "base_uri": "https://localhost:8080/"
@@ -372,14 +252,10 @@
372
  }
373
  ],
374
  "source": [
375
- "import glob\n",
376
- "from collections import OrderedDict\n",
377
- "\n",
378
  "import torch\n",
 
379
  "from PIL import Image\n",
380
- "import open_clip\n",
381
  "\n",
382
- "dataset_path = 'biomed-clip-share/example_data/biomed_image_classification_example_data'\n",
383
  "template = 'this is a photo of '\n",
384
  "labels = [\n",
385
  " 'adenocarcinoma histopathology',\n",
@@ -393,15 +269,25 @@
393
  " 'hematoxylin and eosin histopathology'\n",
394
  "]\n",
395
  "\n",
396
- "test_imgs = glob.glob(dataset_path + '/*')\n",
397
- "\n",
 
 
 
 
 
 
 
 
 
 
398
  "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
399
  "model.to(device)\n",
400
  "model.eval()\n",
401
  "\n",
402
  "context_length = 256\n",
403
  "\n",
404
- "images = torch.stack([preprocess_val(Image.open(img)) for img in test_imgs]).to(device)\n",
405
  "texts = tokenizer([template + l for l in labels], context_length=context_length).to(device)\n",
406
  "with torch.no_grad():\n",
407
  " image_features, text_features, logit_scale = model(images, texts)\n",
@@ -547,7 +433,7 @@
547
  },
548
  {
549
  "cell_type": "code",
550
- "execution_count": 6,
551
  "metadata": {
552
  "colab": {
553
  "base_uri": "https://localhost:8080/",
@@ -576,7 +462,7 @@
576
  " fig, axes = plt.subplots(nrows=num_images, ncols=1, figsize=(5, 5 * num_images))\n",
577
  "\n",
578
  " for i, (img_path, metadata) in enumerate(zip(images, metadata)):\n",
579
- " img = Image.open(img_path)\n",
580
  " ax = axes[i]\n",
581
  " ax.imshow(img)\n",
582
  " ax.axis('off')\n",
 
15
  },
16
  {
17
  "cell_type": "code",
18
+ "execution_count": 19,
19
  "metadata": {
20
  "colab": {
21
  "base_uri": "https://localhost:8080/"
 
99
  },
100
  {
101
  "cell_type": "code",
102
+ "execution_count": 20,
103
  "metadata": {
104
  "colab": {
105
  "base_uri": "https://localhost:8080/"
 
107
  "id": "V8Yv9g_8EQ1W",
108
  "outputId": "3ec24c9b-4c4f-4c17-8d76-6cfd74bb8bdf"
109
  },
110
+ "outputs": [],
 
 
 
 
 
 
 
 
 
111
  "source": [
112
+ "from open_clip import create_model_from_pretrained, get_tokenizer # works on open-clip-torch>=2.23.0, timm>=0.9.8\n",
113
  "\n",
114
+ "model, preprocess = create_model_from_pretrained('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')\n",
115
+ "tokenizer = get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  ]
117
  },
118
  {
 
127
  },
128
  {
129
  "cell_type": "code",
130
+ "execution_count": 21,
131
  "metadata": {
132
  "colab": {
133
  "base_uri": "https://localhost:8080/"
 
252
  }
253
  ],
254
  "source": [
 
 
 
255
  "import torch\n",
256
+ "from urllib.request import urlopen\n",
257
  "from PIL import Image\n",
 
258
  "\n",
 
259
  "template = 'this is a photo of '\n",
260
  "labels = [\n",
261
  " 'adenocarcinoma histopathology',\n",
 
269
  " 'hematoxylin and eosin histopathology'\n",
270
  "]\n",
271
  "\n",
272
+ "dataset_url = 'https://huggingface.co/microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224/resolve/main/example_data/biomed_image_classification_example_data/'\n",
273
+ "test_imgs = [\n",
274
+ " 'squamous_cell_carcinoma_histopathology.jpeg',\n",
275
+ " 'H_and_E_histopathology.jpg',\n",
276
+ " 'bone_X-ray.jpg',\n",
277
+ " 'adenocarcinoma_histopathology.jpg',\n",
278
+ " 'covid_line_chart.png',\n",
279
+ " 'IHC_histopathology.jpg',\n",
280
+ " 'chest_X-ray.jpg',\n",
281
+ " 'brain_MRI.jpg',\n",
282
+ " 'pie_chart.png'\n",
283
+ "]\n",
284
  "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
285
  "model.to(device)\n",
286
  "model.eval()\n",
287
  "\n",
288
  "context_length = 256\n",
289
  "\n",
290
+ "images = torch.stack([preprocess(Image.open(urlopen(dataset_url + img))) for img in test_imgs]).to(device)\n",
291
  "texts = tokenizer([template + l for l in labels], context_length=context_length).to(device)\n",
292
  "with torch.no_grad():\n",
293
  " image_features, text_features, logit_scale = model(images, texts)\n",
 
433
  },
434
  {
435
  "cell_type": "code",
436
+ "execution_count": 22,
437
  "metadata": {
438
  "colab": {
439
  "base_uri": "https://localhost:8080/",
 
462
  " fig, axes = plt.subplots(nrows=num_images, ncols=1, figsize=(5, 5 * num_images))\n",
463
  "\n",
464
  " for i, (img_path, metadata) in enumerate(zip(images, metadata)):\n",
465
+ " img = Image.open(urlopen(dataset_url + img_path))\n",
466
  " ax = axes[i]\n",
467
  " ax.imshow(img)\n",
468
  " ax.axis('off')\n",