TongkunGuan commited on
Commit
0afd727
·
verified ·
1 Parent(s): 63e22db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -11,11 +11,12 @@ from utils import generate_similiarity_map, post_process, load_tokenizer, build_
11
  from utils import IMAGENET_MEAN, IMAGENET_STD
12
  from internvl.train.dataset import dynamic_preprocess
13
  from internvl.model.internvl_chat import InternVLChatModel
 
14
 
15
  # 模型配置
16
  CHECKPOINTS = {
17
- "TokenFD-4096-English-seg": "TongkunGuan/TokenFD_4096_English_seg",
18
- "TokenFD-2048-Bilingual-seg": "TongkunGuan/TokenFD_2048_Bilingual_seg",
19
  }
20
 
21
  # 全局变量
@@ -24,9 +25,10 @@ current_vis = []
24
  current_bpe = []
25
  current_index = 0
26
 
 
27
  def load_model(check_type):
28
- device = torch.device("cpu")
29
-
30
  if check_type == 'R50':
31
  tokenizer = load_tokenizer('tokenizer_path')
32
  model = build_model(argparse.Namespace()).eval()
@@ -39,7 +41,7 @@ def load_model(check_type):
39
  model.load_state_dict(torch.load(CHECKPOINTS['R50_siglip'], map_location='cpu')['model'])
40
  transform = build_transform_R50(normalize_type='imagenet')
41
 
42
- elif 'TokenOCR' in check_type:
43
  model_path = CHECKPOINTS[check_type]
44
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False, use_auth_token=HF_TOKEN)
45
  model = InternVLChatModel.from_pretrained(model_path, torch_dtype=torch.bfloat16).eval()
@@ -121,9 +123,9 @@ with gr.Blocks(title="BPE Visualization Demo") as demo:
121
  with gr.Row():
122
  with gr.Column(scale=0.5):
123
  model_type = gr.Dropdown(
124
- choices=["TokenOCR-4096-English-seg", "TokenOCR-2048-Bilingual-seg", "R50", "R50_siglip"],
125
  label="Select model type",
126
- value="R50" # 设置默认值为第一个选项
127
  )
128
  image_input = gr.Image(label="Upload images", type="pil")
129
  text_input = gr.Textbox(label="Input text")
@@ -155,6 +157,7 @@ with gr.Blocks(title="BPE Visualization Demo") as demo:
155
  bpe_display = gr.Markdown("Current BPE: ", visible=False)
156
 
157
  # 事件处理
 
158
  def on_run_clicked(model_type, image, text):
159
  global current_vis, current_bpe, current_index
160
  current_index = 0 # Reset index when new image is processed
 
11
  from utils import IMAGENET_MEAN, IMAGENET_STD
12
  from internvl.train.dataset import dynamic_preprocess
13
  from internvl.model.internvl_chat import InternVLChatModel
14
+ import spaces
15
 
16
  # 模型配置
17
  CHECKPOINTS = {
18
+ "TokenFD_4096_English_seg": "TongkunGuan/TokenFD_4096_English_seg",
19
+ "TokenFD_2048_Bilingual_seg": "TongkunGuan/TokenFD_2048_Bilingual_seg",
20
  }
21
 
22
  # 全局变量
 
25
  current_bpe = []
26
  current_index = 0
27
 
28
+
29
  def load_model(check_type):
30
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
+ device = torch.device("cuda")
32
  if check_type == 'R50':
33
  tokenizer = load_tokenizer('tokenizer_path')
34
  model = build_model(argparse.Namespace()).eval()
 
41
  model.load_state_dict(torch.load(CHECKPOINTS['R50_siglip'], map_location='cpu')['model'])
42
  transform = build_transform_R50(normalize_type='imagenet')
43
 
44
+ elif 'TokenFD' in check_type:
45
  model_path = CHECKPOINTS[check_type]
46
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False, use_auth_token=HF_TOKEN)
47
  model = InternVLChatModel.from_pretrained(model_path, torch_dtype=torch.bfloat16).eval()
 
123
  with gr.Row():
124
  with gr.Column(scale=0.5):
125
  model_type = gr.Dropdown(
126
+ choices=["TokenOCR_4096_English_seg", "TokenOCR_2048_Bilingual_seg", "R50", "R50_siglip"],
127
  label="Select model type",
128
+ value="TokenOCR_4096_English_seg" # 设置默认值为第一个选项
129
  )
130
  image_input = gr.Image(label="Upload images", type="pil")
131
  text_input = gr.Textbox(label="Input text")
 
157
  bpe_display = gr.Markdown("Current BPE: ", visible=False)
158
 
159
  # 事件处理
160
+ @spaces.GPU
161
  def on_run_clicked(model_type, image, text):
162
  global current_vis, current_bpe, current_index
163
  current_index = 0 # Reset index when new image is processed