chentianqi commited on
Commit
33607ba
·
verified ·
1 Parent(s): cb720dc

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +36 -3
README.md CHANGED
@@ -164,11 +164,44 @@ model = GPTQModel.load(
164
  torch_dtype=torch.float16,
165
  attn_implementation="flash_attention_2"
166
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  ```
168
 
 
 
169
  ## Notes
170
 
171
- - Ensure you have the correct dependencies installed
172
- - Modify the `model_path` to match your local model location
173
  - The code provides both commented-out FP model loading and GPTQ model loading
174
- - Customize the device map and torch dtype as needed for your specific setup
 
164
  torch_dtype=torch.float16,
165
  attn_implementation="flash_attention_2"
166
  )
167
+
168
+
169
+ from qwen_omni_utils import process_mm_info
170
+ processor = Qwen2_5OmniProcessor.from_pretrained(model_path)
171
+ # @title inference function
172
+ def inference(video_path, prompt, sys_prompt):
173
+ messages = [
174
+ {"role": "system", "content": sys_prompt},
175
+ {"role": "user", "content": [
176
+ {"type": "text", "text": prompt},
177
+ {"type": "video", "video": video_path},
178
+ ]
179
+ },
180
+ ]
181
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
182
+ # image_inputs, video_inputs = process_vision_info([messages])
183
+ audios, images, videos = process_mm_info(messages, use_audio_in_video=False)
184
+ inputs = processor(text=text, audios=audios, images=images, videos=videos, return_tensors="pt", padding=True)
185
+ inputs = inputs.to(model.device).to(model.dtype)
186
+
187
+ output = model.generate(**inputs, use_audio_in_video=False, return_audio=False)
188
+
189
+ text = processor.batch_decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=False)
190
+ return text
191
+
192
+ video_path = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-Omni/screen.mp4"
193
+ prompt = "Please trranslate the abstract of paper into Chinese."
194
+
195
+ # display(Video(video_path, width=640, height=360))
196
+
197
+ ## Use a local HuggingFace model to inference.
198
+ response = inference(video_path, prompt=prompt, sys_prompt="You are a helpful assistant.")
199
+ print(response[0])
200
  ```
201
 
202
+
203
+
204
  ## Notes
205
 
 
 
206
  - The code provides both commented-out FP model loading and GPTQ model loading
207
+