Upload MedVLThinker-3B-RL_m23k-RL_PMC model weights
Browse files
README.md
CHANGED
@@ -19,6 +19,7 @@ pipeline_tag: image-text-to-text
|
|
19 |
# MedVLThinker-3B-RL_m23k-RL_PMC
|
20 |
|
21 |
Code: https://github.com/UCSC-VLAA/MedVLThinker
|
|
|
22 |
|
23 |
## Model Description
|
24 |
|
@@ -34,13 +35,15 @@ This model has been trained using reinforcement learning on Med23k + PMC-VQA dat
|
|
34 |
|
35 |
## Usage
|
36 |
|
|
|
|
|
37 |
```python
|
38 |
-
from transformers import
|
39 |
from qwen_vl_utils import process_vision_info
|
40 |
import torch
|
41 |
|
42 |
# Load the model
|
43 |
-
model =
|
44 |
"UCSC-VLAA/MedVLThinker-3B-RL_m23k-RL_PMC",
|
45 |
torch_dtype=torch.bfloat16,
|
46 |
device_map="auto"
|
@@ -49,6 +52,10 @@ processor = AutoProcessor.from_pretrained("UCSC-VLAA/MedVLThinker-3B-RL_m23k-RL_
|
|
49 |
|
50 |
# Example usage
|
51 |
messages = [
|
|
|
|
|
|
|
|
|
52 |
{
|
53 |
"role": "user",
|
54 |
"content": [
|
@@ -76,7 +83,7 @@ inputs = processor(
|
|
76 |
inputs = inputs.to("cuda")
|
77 |
|
78 |
# Inference
|
79 |
-
generated_ids = model.generate(**inputs, max_new_tokens=
|
80 |
generated_ids_trimmed = [
|
81 |
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
82 |
]
|
@@ -91,7 +98,7 @@ print(output_text)
|
|
91 |
```bibtex
|
92 |
@article{medvlthinker2025,
|
93 |
title={MedVLThinker: Simple Baselines for Multimodal Medical Reasoning},
|
94 |
-
author={
|
95 |
journal={arXiv preprint},
|
96 |
year={2025}
|
97 |
}
|
|
|
19 |
# MedVLThinker-3B-RL_m23k-RL_PMC
|
20 |
|
21 |
Code: https://github.com/UCSC-VLAA/MedVLThinker
|
22 |
+
Project Page: https://ucsc-vlaa.github.io/MedVLThinker/
|
23 |
|
24 |
## Model Description
|
25 |
|
|
|
35 |
|
36 |
## Usage
|
37 |
|
38 |
+
Check here for demo images: https://github.com/UCSC-VLAA/MedVLThinker?tab=readme-ov-file#demo
|
39 |
+
|
40 |
```python
|
41 |
+
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
|
42 |
from qwen_vl_utils import process_vision_info
|
43 |
import torch
|
44 |
|
45 |
# Load the model
|
46 |
+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
47 |
"UCSC-VLAA/MedVLThinker-3B-RL_m23k-RL_PMC",
|
48 |
torch_dtype=torch.bfloat16,
|
49 |
device_map="auto"
|
|
|
52 |
|
53 |
# Example usage
|
54 |
messages = [
|
55 |
+
{
|
56 |
+
"role": "system",
|
57 |
+
"content": "You will solve a problem/request. You should provide your thoughts within <think> </think> tags before providing the answer. Write your final answer within <answer> </answer> tags.",
|
58 |
+
},
|
59 |
{
|
60 |
"role": "user",
|
61 |
"content": [
|
|
|
83 |
inputs = inputs.to("cuda")
|
84 |
|
85 |
# Inference
|
86 |
+
generated_ids = model.generate(**inputs, max_new_tokens=2048, temperature=0.6, top_p=0.95, do_sample=True)
|
87 |
generated_ids_trimmed = [
|
88 |
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
89 |
]
|
|
|
98 |
```bibtex
|
99 |
@article{medvlthinker2025,
|
100 |
title={MedVLThinker: Simple Baselines for Multimodal Medical Reasoning},
|
101 |
+
author={Huang, Xiaoke and Wu, Juncheng and Liu, Hui and Tang, Xianfeng and Zhou, Yuyin},
|
102 |
journal={arXiv preprint},
|
103 |
year={2025}
|
104 |
}
|