verify_01 / README.md
Namitoo's picture
Update README.md
7593f3e
|
raw
history blame
5.94 kB
---
metrics:
- bertscore
- accuracy
model-index:
- name: StarCoder
results:
- task:
type: text-generation
dataset:
type: openai_humaneval
name: HumanEval (Prompted)
metrics:
- name: pass@1
type: pass@1
value: 0.408
verified: false
- task:
type: text-generation
dataset:
type: openai_humaneval
name: HumanEval
metrics:
- name: pass@1
type: pass@1
value: 0.336
verified: false
- task:
type: text-generation
dataset:
type: mbpp
name: MBPP
metrics:
- name: pass@1
type: pass@1
value: 0.527
verified: false
- task:
type: text-generation
dataset:
type: ds1000
name: DS-1000 (Overall Completion)
metrics:
- name: pass@1
type: pass@1
value: 0.26
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (C++)
metrics:
- name: pass@1
type: pass@1
value: 0.3155
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (C#)
metrics:
- name: pass@1
type: pass@1
value: 0.2101
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (D)
metrics:
- name: pass@1
type: pass@1
value: 0.1357
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Go)
metrics:
- name: pass@1
type: pass@1
value: 0.1761
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Java)
metrics:
- name: pass@1
type: pass@1
value: 0.3022
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Julia)
metrics:
- name: pass@1
type: pass@1
value: 0.2302
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (JavaScript)
metrics:
- name: pass@1
type: pass@1
value: 0.3079
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Lua)
metrics:
- name: pass@1
type: pass@1
value: 0.2389
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (PHP)
metrics:
- name: pass@1
type: pass@1
value: 0.2608
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Perl)
metrics:
- name: pass@1
type: pass@1
value: 0.1734
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Python)
metrics:
- name: pass@1
type: pass@1
value: 0.3357
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (R)
metrics:
- name: pass@1
type: pass@1
value: 0.155
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Ruby)
metrics:
- name: pass@1
type: pass@1
value: 0.0124
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Racket)
metrics:
- name: pass@1
type: pass@1
value: 0.0007
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Rust)
metrics:
- name: pass@1
type: pass@1
value: 0.2184
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Scala)
metrics:
- name: pass@1
type: pass@1
value: 0.2761
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Bash)
metrics:
- name: pass@1
type: pass@1
value: 0.1046
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (Swift)
metrics:
- name: pass@1
type: pass@1
value: 0.2274
verified: false
- task:
type: text-generation
dataset:
type: nuprl/MultiPL-E
name: MultiPL-HumanEval (TypeScript)
metrics:
- name: pass@1
type: pass@1
value: 0.3229
verified: false
---