mispeech
/

midashenglm-7b-1021-bf16

Audio-Text-to-Text

audio-language-model

Model card Files Files and versions

zhoukz commited on 7 days ago

Commit

2c2b5be

·

unverified ·

1 Parent(s): 5b1f7b7

Warn when system prompt is modified

Files changed (1) hide show

processing_midashenglm.py +45 -1

processing_midashenglm.py CHANGED Viewed

@@ -1,10 +1,16 @@
 from typing import Dict, List, Optional, Union, cast
 import numpy as np
 import torch
 from transformers import Qwen2Tokenizer, Qwen2TokenizerFast, Wav2Vec2FeatureExtractor
 from transformers.feature_extraction_utils import BatchFeature
-from transformers.processing_utils import ProcessingKwargs, ProcessorMixin
 from typing_extensions import Unpack
@@ -153,6 +159,44 @@ class MiDashengLMProcessor(ProcessorMixin):
             f"Expected audio to be a numpy array, torch tensor, or string, but got {type(sample)}."
         )
     def __call__(
         self,
         text: Optional[List[str]] = None,

+import logging
+from collections.abc import Mapping
 from typing import Dict, List, Optional, Union, cast
 import numpy as np
 import torch
 from transformers import Qwen2Tokenizer, Qwen2TokenizerFast, Wav2Vec2FeatureExtractor
 from transformers.feature_extraction_utils import BatchFeature
+from transformers.processing_utils import (
+    AllKwargsForChatTemplate,
+    ProcessingKwargs,
+    ProcessorMixin,
+)
 from typing_extensions import Unpack
             f"Expected audio to be a numpy array, torch tensor, or string, but got {type(sample)}."
         )
+    def apply_chat_template(
+        self,
+        conversation: Union[list[dict[str, str]], list[list[dict[str, str]]]],
+        chat_template: Optional[str] = None,
+        **kwargs: Unpack[AllKwargsForChatTemplate],
+    ) -> str:
+        if conversation:
+            first_msgs = (
+                [conversation[0]]
+                if isinstance(conversation[0], Mapping)
+                else [conv[0] for conv in conversation if conv]
+            )
+            for first_msg in first_msgs:
+                if first_msg["role"] != "system":
+                    continue
+                system_prompt: str
+                if isinstance(first_msg["content"], str):
+                    system_prompt = first_msg["content"]
+                elif isinstance(first_msg["content"], list):
+                    for part in first_msg["content"]:
+                        if isinstance(part, dict) and "text" in part:
+                            system_prompt = part["text"]
+                            break
+                    else:
+                        continue
+                else:
+                    continue
+                if system_prompt != (
+                    "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, "
+                    "capable of perceiving auditory and visual inputs, as well as generating text and speech."
+                ):
+                    logging.warning(
+                        "The system prompt has been modified, which may reduce model performance. "
+                        "Prefer using the default system prompt by omitting the system role from the input."
+                    )
+        return super().apply_chat_template(conversation, chat_template, **kwargs)
     def __call__(
         self,
         text: Optional[List[str]] = None,