Word2Li commited on
Commit
f9c8402
·
verified ·
1 Parent(s): 7c02997

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -16
README.md CHANGED
@@ -18,8 +18,8 @@ model-index:
18
  name: MMLU
19
  type: MMLU
20
  metrics:
21
- - name: Weighted Avg.
22
- type: Weighted Avg.
23
  value: 38.73
24
  verified: true
25
  - task:
@@ -28,8 +28,8 @@ model-index:
28
  name: IFEval
29
  type: IFEval
30
  metrics:
31
- - name: Avg.
32
- type: Avg.
33
  value: 44.01
34
  verified: true
35
  - task:
@@ -38,8 +38,8 @@ model-index:
38
  name: GSM8K
39
  type: GSM8K
40
  metrics:
41
- - name: pass@1
42
- type: pass@1
43
  value: 34.80
44
  verified: true
45
  - task:
@@ -48,8 +48,8 @@ model-index:
48
  name: MATH
49
  type: MATH
50
  metrics:
51
- - name: pass@1
52
- type: pass@1
53
  value: 6.64
54
  verified: true
55
  - task:
@@ -58,8 +58,8 @@ model-index:
58
  name: HumanEval
59
  type: HumanEval
60
  metrics:
61
- - name: pass@1
62
- type: pass@1
63
  value: 26.22
64
  verified: true
65
  - task:
@@ -68,8 +68,8 @@ model-index:
68
  name: MBPP
69
  type: MBPP
70
  metrics:
71
- - name: pass@1
72
- type: pass@1
73
  value: 31.40
74
  verified: true
75
  - task:
@@ -78,8 +78,8 @@ model-index:
78
  name: Hellaswag
79
  type: Hellaswag
80
  metrics:
81
- - name: pass@1
82
- type: pass@1
83
  value: 44.86
84
  verified: true
85
  - task:
@@ -88,8 +88,8 @@ model-index:
88
  name: GPQA
89
  type: GPQA
90
  metrics:
91
- - name: pass@1
92
- type: pass@1
93
  value: 11.11
94
  verified: true
95
  metrics:
 
18
  name: MMLU
19
  type: MMLU
20
  metrics:
21
+ - name: weighted accuracy
22
+ type: weighted accuracy
23
  value: 38.73
24
  verified: true
25
  - task:
 
28
  name: IFEval
29
  type: IFEval
30
  metrics:
31
+ - name: overall accuracy
32
+ type: overall accuracy
33
  value: 44.01
34
  verified: true
35
  - task:
 
38
  name: GSM8K
39
  type: GSM8K
40
  metrics:
41
+ - name: accuracy
42
+ type: accuracy
43
  value: 34.80
44
  verified: true
45
  - task:
 
48
  name: MATH
49
  type: MATH
50
  metrics:
51
+ - name: accuracy
52
+ type: accuracy
53
  value: 6.64
54
  verified: true
55
  - task:
 
58
  name: HumanEval
59
  type: HumanEval
60
  metrics:
61
+ - name: humaneval_pass@1
62
+ type: humaneval_pass@1
63
  value: 26.22
64
  verified: true
65
  - task:
 
68
  name: MBPP
69
  type: MBPP
70
  metrics:
71
+ - name: score
72
+ type: score
73
  value: 31.40
74
  verified: true
75
  - task:
 
78
  name: Hellaswag
79
  type: Hellaswag
80
  metrics:
81
+ - name: accuracy
82
+ type: accuracy
83
  value: 44.86
84
  verified: true
85
  - task:
 
88
  name: GPQA
89
  type: GPQA
90
  metrics:
91
+ - name: accuracy
92
+ type: accuracy
93
  value: 11.11
94
  verified: true
95
  metrics: