Update README.md
Browse files
README.md
CHANGED
@@ -164,11 +164,44 @@ model = GPTQModel.load(
|
|
164 |
torch_dtype=torch.float16,
|
165 |
attn_implementation="flash_attention_2"
|
166 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
```
|
168 |
|
|
|
|
|
169 |
## Notes
|
170 |
|
171 |
-
- Ensure you have the correct dependencies installed
|
172 |
-
- Modify the `model_path` to match your local model location
|
173 |
- The code provides both commented-out FP model loading and GPTQ model loading
|
174 |
-
|
|
|
164 |
torch_dtype=torch.float16,
|
165 |
attn_implementation="flash_attention_2"
|
166 |
)
|
167 |
+
|
168 |
+
|
169 |
+
from qwen_omni_utils import process_mm_info
|
170 |
+
processor = Qwen2_5OmniProcessor.from_pretrained(model_path)
|
171 |
+
# @title inference function
|
172 |
+
def inference(video_path, prompt, sys_prompt):
|
173 |
+
messages = [
|
174 |
+
{"role": "system", "content": sys_prompt},
|
175 |
+
{"role": "user", "content": [
|
176 |
+
{"type": "text", "text": prompt},
|
177 |
+
{"type": "video", "video": video_path},
|
178 |
+
]
|
179 |
+
},
|
180 |
+
]
|
181 |
+
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
182 |
+
# image_inputs, video_inputs = process_vision_info([messages])
|
183 |
+
audios, images, videos = process_mm_info(messages, use_audio_in_video=False)
|
184 |
+
inputs = processor(text=text, audios=audios, images=images, videos=videos, return_tensors="pt", padding=True)
|
185 |
+
inputs = inputs.to(model.device).to(model.dtype)
|
186 |
+
|
187 |
+
output = model.generate(**inputs, use_audio_in_video=False, return_audio=False)
|
188 |
+
|
189 |
+
text = processor.batch_decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
190 |
+
return text
|
191 |
+
|
192 |
+
video_path = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-Omni/screen.mp4"
|
193 |
+
prompt = "Please trranslate the abstract of paper into Chinese."
|
194 |
+
|
195 |
+
# display(Video(video_path, width=640, height=360))
|
196 |
+
|
197 |
+
## Use a local HuggingFace model to inference.
|
198 |
+
response = inference(video_path, prompt=prompt, sys_prompt="You are a helpful assistant.")
|
199 |
+
print(response[0])
|
200 |
```
|
201 |
|
202 |
+
|
203 |
+
|
204 |
## Notes
|
205 |
|
|
|
|
|
206 |
- The code provides both commented-out FP model loading and GPTQ model loading
|
207 |
+
|