Spaces:

prithivMLmods
/

Qwen3-VL-HF-Demo

Running on Zero

App Files Files Community

prithivMLmods commited on 19 days ago

Commit

2bb8d53

verified ·

1 Parent(s): e62211b

update app

Browse files

Files changed (1) hide show

app.py +16 -16

app.py CHANGED Viewed

@@ -51,7 +51,7 @@ class ThistleTheme(Soft):
         neutral_hue: colors.Color | str = colors.slate,
         text_size: sizes.Size | str = sizes.text_lg,
         font: fonts.Font | str | Iterable[fonts.Font | str] = (
-            fonts.GoogleFont("Inconsolata"), "Arial", "sans-serif",
         ),
         font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
             fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
@@ -372,17 +372,17 @@ def generate_gif(text: str, gif_path: str, max_new_tokens: int = 1024, temperatu
         buffer = buffer.replace("<|im_end|>", "")
         time.sleep(0.01)
         yield buffer, buffer
-image_examples = [["Perform OCR on the image precisely and reconstruct it correctly...", "examples/images/1.jpg"],
                   ["Caption the image. Describe the safety measures shown in the image. Conclude whether the situation is (safe or unsafe)...", "examples/images/2.jpg"],
                   ["Solve the problem...", "examples/images/3.png"]]
-video_examples = [["Explain the Ad video in detail.", "examples/videos/1.mp4"],
                   ["Explain the video in detail.", "examples/videos/2.mp4"]]
-pdf_examples = [["Extract the content precisely.", "examples/pdfs/doc1.pdf"],
                 ["Analyze and provide a short report.", "examples/pdfs/doc2.pdf"]]
 gif_examples = [["Describe this GIF.", "examples/gifs/1.gif"],
                 ["Describe this GIF.", "examples/gifs/2.gif"]]
-caption_examples = [["https://huggingface.co/datasets/merve/vlm_test_images/resolve/main/candy.JPG"],
                     ["examples/captions/2.png"], ["examples/captions/3.png"]]
 with gr.Blocks(theme=thistle_theme, css=css) as demo:
@@ -440,21 +440,21 @@ with gr.Blocks(theme=thistle_theme, css=css) as demo:
             output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=14, show_copy_button=True)
             with gr.Accordion("(Result.md)", open=False):
                 markdown_output = gr.Markdown(label="(Result.Md)")
-    image_submit.click(fn=generate_image,
-                       inputs=[image_query, image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
                        outputs=[output, markdown_output])
-    video_submit.click(fn=generate_video,
-                       inputs=[video_query, video_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
                        outputs=[output, markdown_output])
     pdf_submit.click(fn=generate_pdf,
-                     inputs=[pdf_query, pdf_state, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
                      outputs=[output, markdown_output])
-    gif_submit.click(fn=generate_gif,
-                     inputs=[gif_query, gif_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
                      outputs=[output, markdown_output])
-    caption_submit.click(fn=generate_caption,
-                         inputs=[caption_image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
                          outputs=[output, markdown_output])
     pdf_upload.change(fn=load_and_preview_pdf, inputs=[pdf_upload], outputs=[pdf_preview_img, pdf_state, page_info])

         neutral_hue: colors.Color | str = colors.slate,
         text_size: sizes.Size | str = sizes.text_lg,
         font: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
         ),
         font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
             fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
         buffer = buffer.replace("<|im_end|>", "")
         time.sleep(0.01)
         yield buffer, buffer
+image_examples = [["Perform OCR on the image precisely and reconstruct it correctly...", "examples/images/1.jpg"],
                   ["Caption the image. Describe the safety measures shown in the image. Conclude whether the situation is (safe or unsafe)...", "examples/images/2.jpg"],
                   ["Solve the problem...", "examples/images/3.png"]]
+video_examples = [["Explain the Ad video in detail.", "examples/videos/1.mp4"],
                   ["Explain the video in detail.", "examples/videos/2.mp4"]]
+pdf_examples = [["Extract the content precisely.", "examples/pdfs/doc1.pdf"],
                 ["Analyze and provide a short report.", "examples/pdfs/doc2.pdf"]]
 gif_examples = [["Describe this GIF.", "examples/gifs/1.gif"],
                 ["Describe this GIF.", "examples/gifs/2.gif"]]
+caption_examples = [["https://huggingface.co/datasets/merve/vlm_test_images/resolve/main/candy.JPG"],
                     ["examples/captions/2.png"], ["examples/captions/3.png"]]
 with gr.Blocks(theme=thistle_theme, css=css) as demo:
             output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=14, show_copy_button=True)
             with gr.Accordion("(Result.md)", open=False):
                 markdown_output = gr.Markdown(label="(Result.Md)")
+    image_submit.click(fn=generate_image,
+                       inputs=[image_query, image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
                        outputs=[output, markdown_output])
+    video_submit.click(fn=generate_video,
+                       inputs=[video_query, video_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
                        outputs=[output, markdown_output])
     pdf_submit.click(fn=generate_pdf,
+                     inputs=[pdf_query, pdf_state, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
                      outputs=[output, markdown_output])
+    gif_submit.click(fn=generate_gif,
+                     inputs=[gif_query, gif_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
                      outputs=[output, markdown_output])
+    caption_submit.click(fn=generate_caption,
+                         inputs=[caption_image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
                          outputs=[output, markdown_output])
     pdf_upload.change(fn=load_and_preview_pdf, inputs=[pdf_upload], outputs=[pdf_preview_img, pdf_state, page_info])