FunAGI
/

Qwen2.5-Omni-7B-GPTQ-4bit

4-bit precision

Model card Files Files and versions Community

chentianqi commited on Mar 28

Commit

33607ba

·

verified ·

1 Parent(s): cb720dc

Update README.md

Files changed (1) hide show

README.md +36 -3

README.md CHANGED Viewed

@@ -164,11 +164,44 @@ model = GPTQModel.load(
     torch_dtype=torch.float16,
     attn_implementation="flash_attention_2"
 )
 ```
 ## Notes
-- Ensure you have the correct dependencies installed
-- Modify the `model_path` to match your local model location
 - The code provides both commented-out FP model loading and GPTQ model loading
-- Customize the device map and torch dtype as needed for your specific setup

     torch_dtype=torch.float16,
     attn_implementation="flash_attention_2"
 )
+from qwen_omni_utils import process_mm_info
+processor = Qwen2_5OmniProcessor.from_pretrained(model_path)
+# @title inference function
+def inference(video_path, prompt, sys_prompt):
+    messages = [
+        {"role": "system", "content": sys_prompt},
+        {"role": "user", "content": [
+                {"type": "text", "text": prompt},
+                {"type": "video", "video": video_path},
+            ]
+        },
+    ]
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    # image_inputs, video_inputs = process_vision_info([messages])
+    audios, images, videos = process_mm_info(messages, use_audio_in_video=False)
+    inputs = processor(text=text, audios=audios, images=images, videos=videos, return_tensors="pt", padding=True)
+    inputs = inputs.to(model.device).to(model.dtype)
+    output = model.generate(**inputs, use_audio_in_video=False, return_audio=False)
+    text = processor.batch_decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+    return text
+video_path = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-Omni/screen.mp4"
+prompt = "Please trranslate the abstract of paper into Chinese."
+# display(Video(video_path, width=640, height=360))
+## Use a local HuggingFace model to inference.
+response = inference(video_path, prompt=prompt, sys_prompt="You are a helpful assistant.")
+print(response[0])
 ```
 ## Notes
 - The code provides both commented-out FP model loading and GPTQ model loading