--- metrics: - bertscore - accuracy model-index: - name: StarCoder results: - task: type: text-generation dataset: type: openai_humaneval name: HumanEval (Prompted) metrics: - name: pass@1 type: pass@1 value: 0.408 verified: false - task: type: text-generation dataset: type: openai_humaneval name: HumanEval metrics: - name: pass@1 type: pass@1 value: 0.336 verified: false - task: type: text-generation dataset: type: mbpp name: MBPP metrics: - name: pass@1 type: pass@1 value: 0.527 verified: false - task: type: text-generation dataset: type: ds1000 name: DS-1000 (Overall Completion) metrics: - name: pass@1 type: pass@1 value: 0.26 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (C++) metrics: - name: pass@1 type: pass@1 value: 0.3155 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (C#) metrics: - name: pass@1 type: pass@1 value: 0.2101 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (D) metrics: - name: pass@1 type: pass@1 value: 0.1357 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Go) metrics: - name: pass@1 type: pass@1 value: 0.1761 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Java) metrics: - name: pass@1 type: pass@1 value: 0.3022 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Julia) metrics: - name: pass@1 type: pass@1 value: 0.2302 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (JavaScript) metrics: - name: pass@1 type: pass@1 value: 0.3079 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Lua) metrics: - name: pass@1 type: pass@1 value: 0.2389 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (PHP) metrics: - name: pass@1 type: pass@1 value: 0.2608 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Perl) metrics: - name: pass@1 type: pass@1 value: 0.1734 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Python) metrics: - name: pass@1 type: pass@1 value: 0.3357 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (R) metrics: - name: pass@1 type: pass@1 value: 0.155 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Ruby) metrics: - name: pass@1 type: pass@1 value: 0.0124 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Racket) metrics: - name: pass@1 type: pass@1 value: 0.0007 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Rust) metrics: - name: pass@1 type: pass@1 value: 0.2184 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Scala) metrics: - name: pass@1 type: pass@1 value: 0.2761 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Bash) metrics: - name: pass@1 type: pass@1 value: 0.1046 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (Swift) metrics: - name: pass@1 type: pass@1 value: 0.2274 verified: false - task: type: text-generation dataset: type: nuprl/MultiPL-E name: MultiPL-HumanEval (TypeScript) metrics: - name: pass@1 type: pass@1 value: 0.3229 verified: false ---