|
--- |
|
metrics: |
|
- bertscore |
|
- accuracy |
|
model-index: |
|
- name: StarCoder |
|
results: |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: openai_humaneval |
|
name: HumanEval (Prompted) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.408 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: openai_humaneval |
|
name: HumanEval |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.336 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: mbpp |
|
name: MBPP |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.527 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: ds1000 |
|
name: DS-1000 (Overall Completion) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.26 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (C++) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.3155 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (C#) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.2101 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (D) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.1357 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Go) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.1761 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Java) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.3022 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Julia) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.2302 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (JavaScript) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.3079 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Lua) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.2389 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (PHP) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.2608 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Perl) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.1734 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Python) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.3357 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (R) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.155 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Ruby) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.0124 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Racket) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.0007 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Rust) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.2184 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Scala) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.2761 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Bash) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.1046 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (Swift) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.2274 |
|
verified: false |
|
- task: |
|
type: text-generation |
|
dataset: |
|
type: nuprl/MultiPL-E |
|
name: MultiPL-HumanEval (TypeScript) |
|
metrics: |
|
- name: pass@1 |
|
type: pass@1 |
|
value: 0.3229 |
|
verified: false |
|
--- |