nehcgs commited on
Commit
720201a
·
verified ·
1 Parent(s): 6b4da3b

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +30 -32
  2. config.json +2 -2
  3. model.safetensors +1 -1
  4. tokenizer_config.json +2 -2
README.md CHANGED
@@ -4,7 +4,7 @@ license_name: katanemo-research
4
  license_link: >-
5
  https://huggingface.co/katanemolabs/Arch-Function-1.5B/blob/main/LICENSE
6
  base_model:
7
- - Qwen/Qwen2.5-1.5B-Instruct
8
  language:
9
  - en
10
  pipeline_tag: text-generation
@@ -84,6 +84,17 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
84
  <td>63.41%</td>
85
  <td>82.93%</td>
86
  </tr>
 
 
 
 
 
 
 
 
 
 
 
87
  <tr style="text-align: center; vertical-align: middle;">
88
  <td>6</td>
89
  <td>o1-preview-2024-09-12 (Prompt)</td>
@@ -95,17 +106,6 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
95
  <td>73.17%</td>
96
  <td>74.60%</td>
97
  </tr>
98
- <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
99
- <td> </td>
100
- <td>Arch-Function-7B</td>
101
- <td>58.44%</td>
102
- <td>85.58%</td>
103
- <td>88.14%</td>
104
- <td>69.08%</td>
105
- <td>20.50%</td>
106
- <td>92.68%</td>
107
- <td>74.05%</td>
108
- </tr>
109
  <tr style="text-align: center; vertical-align: middle; ">
110
  <td>9</td>
111
  <td>Gemini-1.5-Flash-002 (Prompt)</td>
@@ -117,6 +117,17 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
117
  <td>85.37%</td>
118
  <td>78.54%</td>
119
  </tr>
 
 
 
 
 
 
 
 
 
 
 
120
  <tr style="text-align: center; vertical-align: middle; ">
121
  <td>12</td>
122
  <td>Claude-3.5-Sonnet-20240620 (FC)</td>
@@ -139,30 +150,17 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
139
  <td>75.61%</td>
140
  <td>49.44%</td>
141
  </tr>
142
- <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
143
- <td> </td>
144
- <td>Arch-Function-3B</td>
145
- <td>56.57%</td>
146
- <td>83.62%</td>
147
- <td>85.36%</td>
148
- <td>66.90%</td>
149
- <td>19.50%</td>
150
- <td>97.56%</td>
151
- <td>70.99%</td>
152
- </tr>
153
- </tr>
154
  <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
155
  <td> </td>
156
  <td>Arch-Function-1.5B</td>
157
- <td>54.52%</td>
158
- <td>80.31%</td>
159
- <td>82.04%</td>
160
- <td>66.19%</td>
161
- <td>17.25%</td>
162
- <td>97.56%</td>
163
- <td>69.95%</td>
164
  </tr>
165
-
166
  <tr style="text-align: center; vertical-align: middle; ">
167
  <td>21</td>
168
  <td>Llama-3.1-70B-Instruct (Prompt)</td>
 
4
  license_link: >-
5
  https://huggingface.co/katanemolabs/Arch-Function-1.5B/blob/main/LICENSE
6
  base_model:
7
+ - Qwen/Qwen2.5-Coder-1.5B-Instruct
8
  language:
9
  - en
10
  pipeline_tag: text-generation
 
84
  <td>63.41%</td>
85
  <td>82.93%</td>
86
  </tr>
87
+ <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
88
+ <td> </td>
89
+ <td>Arch-Function-7B</td>
90
+ <td>59.62%</td>
91
+ <td>86.83%</td>
92
+ <td>88.07%</td>
93
+ <td>71.57%</td>
94
+ <td>21.00%</td>
95
+ <td>95.12%</td>
96
+ <td>73.63%</td>
97
+ </tr>
98
  <tr style="text-align: center; vertical-align: middle;">
99
  <td>6</td>
100
  <td>o1-preview-2024-09-12 (Prompt)</td>
 
106
  <td>73.17%</td>
107
  <td>74.60%</td>
108
  </tr>
 
 
 
 
 
 
 
 
 
 
 
109
  <tr style="text-align: center; vertical-align: middle; ">
110
  <td>9</td>
111
  <td>Gemini-1.5-Flash-002 (Prompt)</td>
 
117
  <td>85.37%</td>
118
  <td>78.54%</td>
119
  </tr>
120
+ <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
121
+ <td> </td>
122
+ <td>Arch-Function-3B</td>
123
+ <td>57.69%</td>
124
+ <td>85.19%</td>
125
+ <td>86.18%</td>
126
+ <td>71.21%</td>
127
+ <td>17.50%</td>
128
+ <td>90.24%</td>
129
+ <td>72.88%</td>
130
+ </tr>
131
  <tr style="text-align: center; vertical-align: middle; ">
132
  <td>12</td>
133
  <td>Claude-3.5-Sonnet-20240620 (FC)</td>
 
150
  <td>75.61%</td>
151
  <td>49.44%</td>
152
  </tr>
 
 
 
 
 
 
 
 
 
 
 
 
153
  <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
154
  <td> </td>
155
  <td>Arch-Function-1.5B</td>
156
+ <td>56.20%</td>
157
+ <td>84.40%</td>
158
+ <td>83.96%</td>
159
+ <td>69.36%</td>
160
+ <td>15.88%</td>
161
+ <td>87.80%</td>
162
+ <td>74.39%</td>
163
  </tr>
 
164
  <tr style="text-align: center; vertical-align: middle; ">
165
  <td>21</td>
166
  <td>Llama-3.1-70B-Instruct (Prompt)</td>
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
@@ -11,7 +11,7 @@
11
  "initializer_range": 0.02,
12
  "intermediate_size": 8960,
13
  "max_position_embeddings": 32768,
14
- "max_window_layers": 21,
15
  "model_type": "qwen2",
16
  "num_attention_heads": 12,
17
  "num_hidden_layers": 28,
 
1
  {
2
+ "_name_or_path": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
 
11
  "initializer_range": 0.02,
12
  "intermediate_size": 8960,
13
  "max_position_embeddings": 32768,
14
+ "max_window_layers": 28,
15
  "model_type": "qwen2",
16
  "num_attention_heads": 12,
17
  "num_hidden_layers": 28,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:277363bbda7db5c37e1f533b74b621a3ff362764486b8001ffdf868e6e36364e
3
  size 3087467144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3440dc10971cff500578021d8f87b978786b39379963fb4056bab0fb8d085630
3
  size 3087467144
tokenizer_config.json CHANGED
@@ -199,9 +199,9 @@
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
- "model_max_length": 131072,
203
  "pad_token": "<|endoftext|>",
204
  "split_special_tokens": false,
205
  "tokenizer_class": "Qwen2Tokenizer",
206
  "unk_token": null
207
- }
 
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
+ "model_max_length": 32768,
203
  "pad_token": "<|endoftext|>",
204
  "split_special_tokens": false,
205
  "tokenizer_class": "Qwen2Tokenizer",
206
  "unk_token": null
207
+ }