xk-huang commited on
Commit
058ea06
·
verified ·
1 Parent(s): 414fc4d

Upload MedVLThinker-3B-RL_m23k-RL_PMC model weights

Browse files
Files changed (1) hide show
  1. README.md +11 -4
README.md CHANGED
@@ -19,6 +19,7 @@ pipeline_tag: image-text-to-text
19
  # MedVLThinker-3B-RL_m23k-RL_PMC
20
 
21
  Code: https://github.com/UCSC-VLAA/MedVLThinker
 
22
 
23
  ## Model Description
24
 
@@ -34,13 +35,15 @@ This model has been trained using reinforcement learning on Med23k + PMC-VQA dat
34
 
35
  ## Usage
36
 
 
 
37
  ```python
38
- from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
39
  from qwen_vl_utils import process_vision_info
40
  import torch
41
 
42
  # Load the model
43
- model = Qwen2VLForConditionalGeneration.from_pretrained(
44
  "UCSC-VLAA/MedVLThinker-3B-RL_m23k-RL_PMC",
45
  torch_dtype=torch.bfloat16,
46
  device_map="auto"
@@ -49,6 +52,10 @@ processor = AutoProcessor.from_pretrained("UCSC-VLAA/MedVLThinker-3B-RL_m23k-RL_
49
 
50
  # Example usage
51
  messages = [
 
 
 
 
52
  {
53
  "role": "user",
54
  "content": [
@@ -76,7 +83,7 @@ inputs = processor(
76
  inputs = inputs.to("cuda")
77
 
78
  # Inference
79
- generated_ids = model.generate(**inputs, max_new_tokens=128)
80
  generated_ids_trimmed = [
81
  out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
82
  ]
@@ -91,7 +98,7 @@ print(output_text)
91
  ```bibtex
92
  @article{medvlthinker2025,
93
  title={MedVLThinker: Simple Baselines for Multimodal Medical Reasoning},
94
- author={Your Team},
95
  journal={arXiv preprint},
96
  year={2025}
97
  }
 
19
  # MedVLThinker-3B-RL_m23k-RL_PMC
20
 
21
  Code: https://github.com/UCSC-VLAA/MedVLThinker
22
+ Project Page: https://ucsc-vlaa.github.io/MedVLThinker/
23
 
24
  ## Model Description
25
 
 
35
 
36
  ## Usage
37
 
38
+ Check here for demo images: https://github.com/UCSC-VLAA/MedVLThinker?tab=readme-ov-file#demo
39
+
40
  ```python
41
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
42
  from qwen_vl_utils import process_vision_info
43
  import torch
44
 
45
  # Load the model
46
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
47
  "UCSC-VLAA/MedVLThinker-3B-RL_m23k-RL_PMC",
48
  torch_dtype=torch.bfloat16,
49
  device_map="auto"
 
52
 
53
  # Example usage
54
  messages = [
55
+ {
56
+ "role": "system",
57
+ "content": "You will solve a problem/request. You should provide your thoughts within <think> </think> tags before providing the answer. Write your final answer within <answer> </answer> tags.",
58
+ },
59
  {
60
  "role": "user",
61
  "content": [
 
83
  inputs = inputs.to("cuda")
84
 
85
  # Inference
86
+ generated_ids = model.generate(**inputs, max_new_tokens=2048, temperature=0.6, top_p=0.95, do_sample=True)
87
  generated_ids_trimmed = [
88
  out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
89
  ]
 
98
  ```bibtex
99
  @article{medvlthinker2025,
100
  title={MedVLThinker: Simple Baselines for Multimodal Medical Reasoning},
101
+ author={Huang, Xiaoke and Wu, Juncheng and Liu, Hui and Tang, Xianfeng and Zhou, Yuyin},
102
  journal={arXiv preprint},
103
  year={2025}
104
  }