dustalov commited on
Commit
88e5897
·
verified ·
1 Parent(s): b47cb9c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +44 -14
README.md CHANGED
@@ -11,6 +11,20 @@ tags:
11
  model-index:
12
  - name: Mellum-4b-base
13
  results:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  - task:
15
  type: text-generation
16
  dataset:
@@ -64,16 +78,16 @@ model-index:
64
  - task:
65
  type: text-generation
66
  dataset:
67
- type: tianyang/repobench_python_v1.1
68
- name: RepoBench 1.1 (Python)
69
  metrics:
70
  - name: EM
71
  type: exact_match
72
- value: 0.2591
73
  verified: false
74
  - name: EM ≤ 8k
75
  type: exact_match
76
- value: 0.2797
77
  verified: false
78
  - task:
79
  type: text-generation
@@ -128,26 +142,42 @@ model-index:
128
  - task:
129
  type: text-generation
130
  dataset:
131
- type: tianyang/repobench_java_v1.1
132
- name: RepoBench 1.1 (Java)
133
  metrics:
134
- - name: EM
135
- type: exact_match
136
- value: 0.2858
137
  verified: false
138
- - name: EM ≤ 8k
139
- type: exact_match
140
- value: 0.3108
 
 
 
 
 
 
141
  verified: false
142
  - task:
143
  type: text-generation
144
  dataset:
145
  type: gonglinyuan/safim
146
- name: SAFIM
147
  metrics:
148
  - name: pass@1
149
  type: pass@1
150
- value: 0.3811
 
 
 
 
 
 
 
 
 
 
151
  verified: false
152
  - task:
153
  type: text-generation
 
11
  model-index:
12
  - name: Mellum-4b-base
13
  results:
14
+ - task:
15
+ type: text-generation
16
+ dataset:
17
+ type: tianyang/repobench_python_v1.1
18
+ name: RepoBench 1.1 (Python)
19
+ metrics:
20
+ - name: EM
21
+ type: exact_match
22
+ value: 0.2591
23
+ verified: false
24
+ - name: EM ≤ 8k
25
+ type: exact_match
26
+ value: 0.2797
27
+ verified: false
28
  - task:
29
  type: text-generation
30
  dataset:
 
78
  - task:
79
  type: text-generation
80
  dataset:
81
+ type: tianyang/repobench_java_v1.1
82
+ name: RepoBench 1.1 (Java)
83
  metrics:
84
  - name: EM
85
  type: exact_match
86
+ value: 0.2858
87
  verified: false
88
  - name: EM ≤ 8k
89
  type: exact_match
90
+ value: 0.3108
91
  verified: false
92
  - task:
93
  type: text-generation
 
142
  - task:
143
  type: text-generation
144
  dataset:
145
+ type: gonglinyuan/safim
146
+ name: SAFIM
147
  metrics:
148
+ - name: pass@1
149
+ type: pass@1
150
+ value: 0.3811
151
  verified: false
152
+ - task:
153
+ type: text-generation
154
+ dataset:
155
+ type: gonglinyuan/safim
156
+ name: SAFIM (Algorithmic)
157
+ metrics:
158
+ - name: pass@1
159
+ type: pass@1
160
+ value: 0.2530
161
  verified: false
162
  - task:
163
  type: text-generation
164
  dataset:
165
  type: gonglinyuan/safim
166
+ name: SAFIM (Control)
167
  metrics:
168
  - name: pass@1
169
  type: pass@1
170
+ value: 0.3839
171
+ verified: false
172
+ - task:
173
+ type: text-generation
174
+ dataset:
175
+ type: gonglinyuan/safim
176
+ name: SAFIM (API)
177
+ metrics:
178
+ - name: pass@1
179
+ type: pass@1
180
+ value: 0.5065
181
  verified: false
182
  - task:
183
  type: text-generation