Spaces:
Running
on
Zero
Running
on
Zero
update app (#9)
Browse files- update app (81625108125ebbde9bd434ab19f47ea6fb62b9b8)
app.py
CHANGED
@@ -56,10 +56,10 @@ model_q = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
56 |
torch_dtype=torch.float16
|
57 |
).to(device).eval()
|
58 |
|
59 |
-
# Load
|
60 |
-
MODEL_ID_Y = "
|
61 |
processor_y = AutoProcessor.from_pretrained(MODEL_ID_Y, trust_remote_code=True)
|
62 |
-
model_y =
|
63 |
MODEL_ID_Y,
|
64 |
trust_remote_code=True,
|
65 |
torch_dtype=torch.float16
|
@@ -106,7 +106,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
106 |
elif model_name == "Qwen2.5-VL-7B-Abliterated-Caption-it":
|
107 |
processor = processor_q
|
108 |
model = model_q
|
109 |
-
elif model_name == "
|
110 |
processor = processor_y
|
111 |
model = model_y
|
112 |
else:
|
@@ -163,7 +163,7 @@ def generate_video(model_name: str, text: str, video_path: str,
|
|
163 |
elif model_name == "Qwen2.5-VL-7B-Abliterated-Caption-it":
|
164 |
processor = processor_q
|
165 |
model = model_q
|
166 |
-
elif model_name == "
|
167 |
processor = processor_y
|
168 |
model = model_y
|
169 |
else:
|
@@ -280,7 +280,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
280 |
markdown_output = gr.Markdown()
|
281 |
|
282 |
model_choice = gr.Radio(
|
283 |
-
choices=["Qwen2.5-VL-7B-Instruct", "Qwen2.5-VL-3B-Instruct", "
|
284 |
label="Select Model",
|
285 |
value="Qwen2.5-VL-7B-Instruct"
|
286 |
)
|
@@ -294,7 +294,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
294 |
"""
|
295 |
)
|
296 |
|
297 |
-
gr.Markdown("> [
|
298 |
|
299 |
gr.Markdown(">⚠️note: all the models in space are not guaranteed to perform well in video inference use cases.")
|
300 |
|
|
|
56 |
torch_dtype=torch.float16
|
57 |
).to(device).eval()
|
58 |
|
59 |
+
# Load Lumian2-VLR-7B-Thinking
|
60 |
+
MODEL_ID_Y = "prithivMLmods/Lumian2-VLR-7B-Thinking"
|
61 |
processor_y = AutoProcessor.from_pretrained(MODEL_ID_Y, trust_remote_code=True)
|
62 |
+
model_y = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
63 |
MODEL_ID_Y,
|
64 |
trust_remote_code=True,
|
65 |
torch_dtype=torch.float16
|
|
|
106 |
elif model_name == "Qwen2.5-VL-7B-Abliterated-Caption-it":
|
107 |
processor = processor_q
|
108 |
model = model_q
|
109 |
+
elif model_name == "Lumian2-VLR-7B-Thinking":
|
110 |
processor = processor_y
|
111 |
model = model_y
|
112 |
else:
|
|
|
163 |
elif model_name == "Qwen2.5-VL-7B-Abliterated-Caption-it":
|
164 |
processor = processor_q
|
165 |
model = model_q
|
166 |
+
elif model_name == "Lumian2-VLR-7B-Thinking":
|
167 |
processor = processor_y
|
168 |
model = model_y
|
169 |
else:
|
|
|
280 |
markdown_output = gr.Markdown()
|
281 |
|
282 |
model_choice = gr.Radio(
|
283 |
+
choices=["Qwen2.5-VL-7B-Instruct", "Qwen2.5-VL-3B-Instruct", "Lumian2-VLR-7B-Thinking", "Qwen2.5-VL-7B-Abliterated-Caption-it"],
|
284 |
label="Select Model",
|
285 |
value="Qwen2.5-VL-7B-Instruct"
|
286 |
)
|
|
|
294 |
"""
|
295 |
)
|
296 |
|
297 |
+
gr.Markdown("> [Lumian2-VLR-7B-Thinking](https://huggingface.co/prithivMLmods/Lumian2-VLR-7B-Thinking): The Lumian2-VLR-7B-Thinking model is a high-fidelity vision-language reasoning (experimental model) system designed for fine-grained multimodal understanding. Built on Qwen2.5-VL-7B-Instruct, this model enhances image captioning, sampled video reasoning, and document comprehension through explicit grounded reasoning. It produces structured reasoning traces aligned with visual coordinates, enabling explainable multimodal reasoning.")
|
298 |
|
299 |
gr.Markdown(">⚠️note: all the models in space are not guaranteed to perform well in video inference use cases.")
|
300 |
|