chunhuizng commited on
Commit
6da6ccb
·
verified ·
1 Parent(s): 5e81b4b

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +39 -83
config.json CHANGED
@@ -1,103 +1,59 @@
1
  {
 
 
2
  "auto_map": {
3
  "AutoModelForCausalLM": "modeling_audio_only_thinker.AudioOnlyThinker"
4
  },
5
- "architectures": [
6
- "AudioOnlyThinker"
7
- ],
8
- "audio_config": {
9
- "_attn_implementation_autoset": true,
10
- "activation_dropout": 0.0,
11
- "activation_function": "gelu",
12
- "attention_dropout": 0.0,
13
- "d_model": 1280,
14
- "dropout": 0.0,
15
- "encoder_attention_heads": 20,
16
- "encoder_ffn_dim": 5120,
17
- "encoder_layerdrop": 0.0,
18
- "encoder_layers": 32,
19
- "init_std": 0.02,
20
- "max_source_positions": 1500,
21
- "model_type": "qwen2_5_omni_audio_encoder",
22
- "n_window": 100,
23
- "num_hidden_layers": 32,
24
- "num_mel_bins": 128,
25
- "output_dim": 3584,
26
- "scale_embedding": false
27
- },
28
- "audio_end_token_id": 151648,
29
- "audio_start_token_id": 151647,
30
  "audio_token_index": 151646,
 
 
31
  "bos_token_id": 151644,
32
  "eos_token_id": 151645,
33
- "ignore_index": -100,
34
- "image_token_index": 151655,
35
- "init_std": 0.02,
36
- "model_type": "qwen2_5_omni_thinker",
37
  "pad_token_id": 151643,
38
- "position_id_per_seconds": 25,
39
- "seconds_per_chunk": 2,
 
40
  "text_config": {
41
- "attention_dropout": 0.0,
42
- "hidden_act": "silu",
43
  "hidden_size": 3584,
44
- "init_std": 0.02,
45
  "intermediate_size": 18944,
46
- "max_position_embeddings": 32768,
47
- "max_window_layers": 28,
48
- "model_type": "qwen2_5_omni_text",
49
  "num_attention_heads": 28,
50
  "num_hidden_layers": 28,
51
  "num_key_value_heads": 4,
52
- "rms_norm_eps": 1e-06,
 
 
 
53
  "rope_scaling": {
54
- "mrope_section": [
55
- 16,
56
- 24,
57
- 24
58
- ],
59
- "rope_type": "default",
60
- "type": "default"
61
  },
62
- "rope_theta": 1000000.0,
63
- "sliding_window": 32768,
64
- "tie_word_embeddings": false,
65
  "use_cache": true,
66
  "use_sliding_window": false,
67
- "vocab_size": 152064
68
- },
69
- "tie_word_embeddings": false,
70
- "torch_dtype": "bfloat16",
71
- "transformers_version": "4.50.0.dev0",
72
- "user_token_id": 872,
73
- "video_token_index": 151656,
74
- "vision_config": {
75
- "_attn_implementation_autoset": true,
76
- "depth": 32,
77
- "embed_dim": 1280,
78
- "fullatt_block_indexes": [
79
- 7,
80
- 15,
81
- 23,
82
- 31
83
- ],
84
- "hidden_act": "silu",
85
- "hidden_size": 1280,
86
- "in_channels": 3,
87
- "in_chans": 3,
88
  "init_std": 0.02,
89
- "intermediate_size": 3420,
90
- "model_type": "qwen2_5_omni_vision_encoder",
91
- "num_heads": 16,
92
- "out_hidden_size": 3584,
93
- "patch_size": 14,
94
- "spatial_merge_size": 2,
95
- "spatial_patch_size": 14,
96
- "temporal_patch_size": 2,
97
- "tokens_per_second": 25,
98
- "window_size": 112
99
  },
100
- "vision_end_token_id": 151653,
101
- "vision_start_token_id": 151652,
102
- "vision_token_id": 151654
103
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
2
+ "model_type": "audio-only-thinker",
3
+ "architectures": ["AudioOnlyThinker"],
4
  "auto_map": {
5
  "AutoModelForCausalLM": "modeling_audio_only_thinker.AudioOnlyThinker"
6
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  "audio_token_index": 151646,
8
+ "audio_start_token_id": 151647,
9
+ "audio_end_token_id": 151648,
10
  "bos_token_id": 151644,
11
  "eos_token_id": 151645,
 
 
 
 
12
  "pad_token_id": 151643,
13
+ "tie_word_embeddings": false,
14
+ "torch_dtype": "bfloat16",
15
+ "transformers_version": "4.50.0.dev0",
16
  "text_config": {
17
+ "model_type": "qwen2_5_omni_text",
18
+ "vocab_size": 152064,
19
  "hidden_size": 3584,
 
20
  "intermediate_size": 18944,
 
 
 
21
  "num_attention_heads": 28,
22
  "num_hidden_layers": 28,
23
  "num_key_value_heads": 4,
24
+ "max_position_embeddings": 32768,
25
+ "sliding_window": 32768,
26
+ "max_window_layers": 28,
27
+ "rope_theta": 1000000.0,
28
  "rope_scaling": {
29
+ "mrope_section": [16, 24, 24],
30
+ "type": "default",
31
+ "rope_type": "default"
 
 
 
 
32
  },
 
 
 
33
  "use_cache": true,
34
  "use_sliding_window": false,
35
+ "tie_word_embeddings": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  "init_std": 0.02,
37
+ "rms_norm_eps": 1e-06,
38
+ "hidden_act": "silu",
39
+ "attention_dropout": 0.0
 
 
 
 
 
 
 
40
  },
41
+ "audio_config": {
42
+ "model_type": "qwen2_5_omni_audio_encoder",
43
+ "d_model": 1280,
44
+ "num_hidden_layers": 32,
45
+ "encoder_attention_heads": 20,
46
+ "encoder_ffn_dim": 5120,
47
+ "dropout": 0.0,
48
+ "activation_function": "gelu",
49
+ "num_mel_bins": 128,
50
+ "output_dim": 3584,
51
+ "max_source_positions": 1500,
52
+ "n_window": 100,
53
+ "scale_embedding": false,
54
+ "init_std": 0.02,
55
+ "activation_dropout": 0.0,
56
+ "attention_dropout": 0.0,
57
+ "_attn_implementation_autoset": true
58
+ }
59
+ }