UCSC-VLAA
/

MedVLThinker-3B-RL_m23k-RL_PMC

@@ -19,6 +19,7 @@ pipeline_tag: image-text-to-text
 # MedVLThinker-3B-RL_m23k-RL_PMC
 Code: https://github.com/UCSC-VLAA/MedVLThinker
 ## Model Description
@@ -34,13 +35,15 @@ This model has been trained using reinforcement learning on Med23k + PMC-VQA dat
 ## Usage
 ```python
-from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
 from qwen_vl_utils import process_vision_info
 import torch
 # Load the model
-model = Qwen2VLForConditionalGeneration.from_pretrained(
     "UCSC-VLAA/MedVLThinker-3B-RL_m23k-RL_PMC",
     torch_dtype=torch.bfloat16,
     device_map="auto"
@@ -49,6 +52,10 @@ processor = AutoProcessor.from_pretrained("UCSC-VLAA/MedVLThinker-3B-RL_m23k-RL_
 # Example usage
 messages = [
     {
         "role": "user",
         "content": [
@@ -76,7 +83,7 @@ inputs = processor(
 inputs = inputs.to("cuda")
 # Inference
-generated_ids = model.generate(**inputs, max_new_tokens=128)
 generated_ids_trimmed = [
     out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
 ]
@@ -91,7 +98,7 @@ print(output_text)
 ```bibtex
 @article{medvlthinker2025,
   title={MedVLThinker: Simple Baselines for Multimodal Medical Reasoning},
-  author={Your Team},
   journal={arXiv preprint},
   year={2025}
 }

 # MedVLThinker-3B-RL_m23k-RL_PMC
 Code: https://github.com/UCSC-VLAA/MedVLThinker
+Project Page: https://ucsc-vlaa.github.io/MedVLThinker/
 ## Model Description
 ## Usage
+Check here for demo images: https://github.com/UCSC-VLAA/MedVLThinker?tab=readme-ov-file#demo
 ```python
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
 from qwen_vl_utils import process_vision_info
 import torch
 # Load the model
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     "UCSC-VLAA/MedVLThinker-3B-RL_m23k-RL_PMC",
     torch_dtype=torch.bfloat16,
     device_map="auto"
 # Example usage
 messages = [
+    {
+        "role": "system",
+        "content": "You will solve a problem/request. You should provide your thoughts within <think> </think> tags before providing the answer. Write your final answer within <answer> </answer> tags.",
+    },
     {
         "role": "user",
         "content": [
 inputs = inputs.to("cuda")
 # Inference
+generated_ids = model.generate(**inputs, max_new_tokens=2048, temperature=0.6, top_p=0.95, do_sample=True)
 generated_ids_trimmed = [
     out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
 ]
 ```bibtex
 @article{medvlthinker2025,
   title={MedVLThinker: Simple Baselines for Multimodal Medical Reasoning},
+  author={Huang, Xiaoke and Wu, Juncheng and Liu, Hui and Tang, Xianfeng and Zhou, Yuyin},
   journal={arXiv preprint},
   year={2025}
 }