prithivMLmods commited on
Commit
6833cd8
·
verified ·
1 Parent(s): 4fd729b
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -56,10 +56,10 @@ model_q = Qwen2_5_VLForConditionalGeneration.from_pretrained(
56
  torch_dtype=torch.float16
57
  ).to(device).eval()
58
 
59
- # Load R-4B
60
- MODEL_ID_Y = "YannQi/R-4B"
61
  processor_y = AutoProcessor.from_pretrained(MODEL_ID_Y, trust_remote_code=True)
62
- model_y = AutoModel.from_pretrained(
63
  MODEL_ID_Y,
64
  trust_remote_code=True,
65
  torch_dtype=torch.float16
@@ -106,7 +106,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
106
  elif model_name == "Qwen2.5-VL-7B-Abliterated-Caption-it":
107
  processor = processor_q
108
  model = model_q
109
- elif model_name == "R-4B":
110
  processor = processor_y
111
  model = model_y
112
  else:
@@ -163,7 +163,7 @@ def generate_video(model_name: str, text: str, video_path: str,
163
  elif model_name == "Qwen2.5-VL-7B-Abliterated-Caption-it":
164
  processor = processor_q
165
  model = model_q
166
- elif model_name == "R-4B":
167
  processor = processor_y
168
  model = model_y
169
  else:
@@ -280,7 +280,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
280
  markdown_output = gr.Markdown()
281
 
282
  model_choice = gr.Radio(
283
- choices=["Qwen2.5-VL-7B-Instruct", "Qwen2.5-VL-3B-Instruct", "R-4B", "Qwen2.5-VL-7B-Abliterated-Caption-it"],
284
  label="Select Model",
285
  value="Qwen2.5-VL-7B-Instruct"
286
  )
@@ -294,7 +294,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
294
  """
295
  )
296
 
297
- gr.Markdown("> [R-4B](https://huggingface.co/YannQi/R-4B): R-4B is a multimodal large language model designed for adaptive auto-thinking, able to intelligently switch between detailed reasoning and direct responses to optimize quality and efficiency. It achieves state-of-the-art performance and efficiency with user-controllable response modes, making it ideal for both simple and complex tasks.")
298
 
299
  gr.Markdown(">⚠️note: all the models in space are not guaranteed to perform well in video inference use cases.")
300
 
 
56
  torch_dtype=torch.float16
57
  ).to(device).eval()
58
 
59
+ # Load Lumian2-VLR-7B-Thinking
60
+ MODEL_ID_Y = "prithivMLmods/Lumian2-VLR-7B-Thinking"
61
  processor_y = AutoProcessor.from_pretrained(MODEL_ID_Y, trust_remote_code=True)
62
+ model_y = Qwen2_5_VLForConditionalGeneration.from_pretrained(
63
  MODEL_ID_Y,
64
  trust_remote_code=True,
65
  torch_dtype=torch.float16
 
106
  elif model_name == "Qwen2.5-VL-7B-Abliterated-Caption-it":
107
  processor = processor_q
108
  model = model_q
109
+ elif model_name == "Lumian2-VLR-7B-Thinking":
110
  processor = processor_y
111
  model = model_y
112
  else:
 
163
  elif model_name == "Qwen2.5-VL-7B-Abliterated-Caption-it":
164
  processor = processor_q
165
  model = model_q
166
+ elif model_name == "Lumian2-VLR-7B-Thinking":
167
  processor = processor_y
168
  model = model_y
169
  else:
 
280
  markdown_output = gr.Markdown()
281
 
282
  model_choice = gr.Radio(
283
+ choices=["Qwen2.5-VL-7B-Instruct", "Qwen2.5-VL-3B-Instruct", "Lumian2-VLR-7B-Thinking", "Qwen2.5-VL-7B-Abliterated-Caption-it"],
284
  label="Select Model",
285
  value="Qwen2.5-VL-7B-Instruct"
286
  )
 
294
  """
295
  )
296
 
297
+ gr.Markdown("> [Lumian2-VLR-7B-Thinking](https://huggingface.co/prithivMLmods/Lumian2-VLR-7B-Thinking): The Lumian2-VLR-7B-Thinking model is a high-fidelity vision-language reasoning (experimental model) system designed for fine-grained multimodal understanding. Built on Qwen2.5-VL-7B-Instruct, this model enhances image captioning, sampled video reasoning, and document comprehension through explicit grounded reasoning. It produces structured reasoning traces aligned with visual coordinates, enabling explainable multimodal reasoning.")
298
 
299
  gr.Markdown(">⚠️note: all the models in space are not guaranteed to perform well in video inference use cases.")
300