Upload MM_LLMs
Browse files- config.json +2 -1
- model.safetensors +1 -1
- modeling_audio.py +5 -1
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "audio-alignment-tinyllama/checkpoint-
|
3 |
"architectures": [
|
4 |
"MM_LLMs"
|
5 |
],
|
@@ -203,6 +203,7 @@
|
|
203 |
"use_weighted_layer_sum": false,
|
204 |
"vocab_size": 51865
|
205 |
},
|
|
|
206 |
"auto_map": {
|
207 |
"AutoConfig": "modeling_audio.MM_LLMs_Config",
|
208 |
"AutoModel": "modeling_audio.MM_LLMs"
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "audio-alignment-tinyllama/checkpoint-6800",
|
3 |
"architectures": [
|
4 |
"MM_LLMs"
|
5 |
],
|
|
|
203 |
"use_weighted_layer_sum": false,
|
204 |
"vocab_size": 51865
|
205 |
},
|
206 |
+
"audio_select_layer": -2,
|
207 |
"auto_map": {
|
208 |
"AutoConfig": "modeling_audio.MM_LLMs_Config",
|
209 |
"AutoModel": "modeling_audio.MM_LLMs"
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2817909376
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d69f1d7e7d57b232dead293d2c9ee96b39daf7783fcb15ba200db80b23b2c80f
|
3 |
size 2817909376
|
modeling_audio.py
CHANGED
@@ -34,11 +34,13 @@ class MM_LLMs_Config(PretrainedConfig):
|
|
34 |
self,
|
35 |
audio_config=None,
|
36 |
llm_config=None,
|
|
|
37 |
**kwargs
|
38 |
):
|
39 |
|
40 |
self.audio_config = audio_config
|
41 |
self.llm_config = llm_config
|
|
|
42 |
|
43 |
if isinstance(self.audio_config, dict):
|
44 |
audio_config["model_type"] = (
|
@@ -252,6 +254,8 @@ class MM_LLMs(PreTrainedModel):
|
|
252 |
return model_inputs
|
253 |
|
254 |
def encode_audio(self, audios):
|
255 |
-
|
|
|
|
|
256 |
audio_features = self.audio_projector(encoded.transpose(1, 2).contiguous())
|
257 |
return audio_features
|
|
|
34 |
self,
|
35 |
audio_config=None,
|
36 |
llm_config=None,
|
37 |
+
audio_select_layer=-2,
|
38 |
**kwargs
|
39 |
):
|
40 |
|
41 |
self.audio_config = audio_config
|
42 |
self.llm_config = llm_config
|
43 |
+
self.audio_select_layer = audio_select_layer
|
44 |
|
45 |
if isinstance(self.audio_config, dict):
|
46 |
audio_config["model_type"] = (
|
|
|
254 |
return model_inputs
|
255 |
|
256 |
def encode_audio(self, audios):
|
257 |
+
|
258 |
+
encoded = self.audio_encoder.encoder(audios, output_hidden_states=True)
|
259 |
+
encoded = encoded.hidden_states[self.config.audio_select_layer]
|
260 |
audio_features = self.audio_projector(encoded.transpose(1, 2).contiguous())
|
261 |
return audio_features
|