mrdbourke commited on
Commit
010417c
Β·
verified Β·
1 Parent(s): 7f4399b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -684,7 +684,7 @@ def infer_on_image(input_image):
684
 
685
  description = f"""Demo based on example [Qwen2.5-VL spatial notebook](https://github.com/QwenLM/Qwen2.5-VL/blob/main/cookbooks/spatial_understanding.ipynb) for detecting foods and drinks in images with bounding boxes. Input an image of food/drink for bounding boxes to be detected. If no food is present in an image the model should return 'no foods found'.\n
686
  One prediction will use thinking tags, e.g. <think>...</think> to try an describe what's in the image. The other will directly predict a JSON of bounding box coordinates and labels.
687
- Boxes may not be as accurate as a dedicated object detection model but the benefit here is that they are class agnostic.
688
  The foundation knowledge in Qwen2.5-VL (we are using [Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) in this demo) means it can detect a wide range of foods and drinks.
689
  See the app.py file for the different prompts used."""
690
 
@@ -697,6 +697,13 @@ demo = gr.Interface(fn=infer_on_image,
697
  gr.Text(label="Raw output w/o thinking tags"),
698
  gr.Text(label="Inference time w/o thinking tags")],
699
  title="Qwen2.5-VL Food Detection πŸ‘οΈπŸ”",
700
- description=description)
 
 
 
 
 
 
 
701
 
702
  demo.launch(debug=True)
 
684
 
685
  description = f"""Demo based on example [Qwen2.5-VL spatial notebook](https://github.com/QwenLM/Qwen2.5-VL/blob/main/cookbooks/spatial_understanding.ipynb) for detecting foods and drinks in images with bounding boxes. Input an image of food/drink for bounding boxes to be detected. If no food is present in an image the model should return 'no foods found'.\n
686
  One prediction will use thinking tags, e.g. <think>...</think> to try an describe what's in the image. The other will directly predict a JSON of bounding box coordinates and labels.
687
+ Boxes may not be as accurate as a dedicated object detection model but the benefit here is that they are class agnostic (e.g. the model can detect a wide range of items despite never being explicitly trained on them).
688
  The foundation knowledge in Qwen2.5-VL (we are using [Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) in this demo) means it can detect a wide range of foods and drinks.
689
  See the app.py file for the different prompts used."""
690
 
 
697
  gr.Text(label="Raw output w/o thinking tags"),
698
  gr.Text(label="Inference time w/o thinking tags")],
699
  title="Qwen2.5-VL Food Detection πŸ‘οΈπŸ”",
700
+ description=description,
701
+ # Examples come in the form of a list of lists, where each inner list contains elements to prefill the `inputs` parameter with
702
+ examples=[
703
+ ["examples/example_1.jpeg"],
704
+ ["examples/example_2.jpeg"],
705
+ ["examples/example_3.jpeg"]
706
+ ],
707
+ cache_examples=True)
708
 
709
  demo.launch(debug=True)