Adding Evaluation Results (#4)
Browse files- Adding Evaluation Results (842a85493025972dd3883e334c466859d2a8a8fc)
    	
        README.md
    CHANGED
    
    | @@ -13,7 +13,12 @@ tags: | |
| 13 | 
             
            - tool-use
         | 
| 14 | 
             
            base_model:
         | 
| 15 | 
             
            - Qwen/Qwen2.5-14B-Instruct
         | 
|  | |
|  | |
|  | |
|  | |
| 16 | 
             
            pipeline_tag: text-generation
         | 
|  | |
| 17 | 
             
            model-index:
         | 
| 18 | 
             
            - name: miscii-14b-1028
         | 
| 19 | 
             
              results:
         | 
| @@ -30,8 +35,7 @@ model-index: | |
| 30 | 
             
                  value: 82.37
         | 
| 31 | 
             
                  name: strict accuracy
         | 
| 32 | 
             
                source:
         | 
| 33 | 
            -
                  url:  | 
| 34 | 
            -
                    https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
| 35 | 
             
                  name: Open LLM Leaderboard
         | 
| 36 | 
             
              - task:
         | 
| 37 | 
             
                  type: text-generation
         | 
| @@ -46,8 +50,7 @@ model-index: | |
| 46 | 
             
                  value: 49.26
         | 
| 47 | 
             
                  name: normalized accuracy
         | 
| 48 | 
             
                source:
         | 
| 49 | 
            -
                  url:  | 
| 50 | 
            -
                    https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
| 51 | 
             
                  name: Open LLM Leaderboard
         | 
| 52 | 
             
              - task:
         | 
| 53 | 
             
                  type: text-generation
         | 
| @@ -62,8 +65,7 @@ model-index: | |
| 62 | 
             
                  value: 6.34
         | 
| 63 | 
             
                  name: exact match
         | 
| 64 | 
             
                source:
         | 
| 65 | 
            -
                  url:  | 
| 66 | 
            -
                    https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
| 67 | 
             
                  name: Open LLM Leaderboard
         | 
| 68 | 
             
              - task:
         | 
| 69 | 
             
                  type: text-generation
         | 
| @@ -78,8 +80,7 @@ model-index: | |
| 78 | 
             
                  value: 14.21
         | 
| 79 | 
             
                  name: acc_norm
         | 
| 80 | 
             
                source:
         | 
| 81 | 
            -
                  url:  | 
| 82 | 
            -
                    https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
| 83 | 
             
                  name: Open LLM Leaderboard
         | 
| 84 | 
             
              - task:
         | 
| 85 | 
             
                  type: text-generation
         | 
| @@ -94,8 +95,7 @@ model-index: | |
| 94 | 
             
                  value: 12
         | 
| 95 | 
             
                  name: acc_norm
         | 
| 96 | 
             
                source:
         | 
| 97 | 
            -
                  url:  | 
| 98 | 
            -
                    https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
| 99 | 
             
                  name: Open LLM Leaderboard
         | 
| 100 | 
             
              - task:
         | 
| 101 | 
             
                  type: text-generation
         | 
| @@ -112,14 +112,8 @@ model-index: | |
| 112 | 
             
                  value: 46.14
         | 
| 113 | 
             
                  name: accuracy
         | 
| 114 | 
             
                source:
         | 
| 115 | 
            -
                  url:  | 
| 116 | 
            -
                    https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
| 117 | 
             
                  name: Open LLM Leaderboard
         | 
| 118 | 
            -
            datasets:
         | 
| 119 | 
            -
            - nvidia/HelpSteer2
         | 
| 120 | 
            -
            - google/Synthetic-Persona-Chat
         | 
| 121 | 
            -
            - mlabonne/orpo-dpo-mix-40k
         | 
| 122 | 
            -
            new_version: sthenno-com/miscii-14b-1225
         | 
| 123 | 
             
            ---
         | 
| 124 |  | 
| 125 | 
             
            # miscii-14b-1028
         | 
| @@ -167,4 +161,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le | |
| 167 | 
             
            |MuSR (0-shot)      |12.00|
         | 
| 168 | 
             
            |MMLU-PRO (5-shot)  |46.14|
         | 
| 169 |  | 
| 170 | 
            -
            $$\large{\text{There's nothing more to Show}}$$
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 13 | 
             
            - tool-use
         | 
| 14 | 
             
            base_model:
         | 
| 15 | 
             
            - Qwen/Qwen2.5-14B-Instruct
         | 
| 16 | 
            +
            datasets:
         | 
| 17 | 
            +
            - nvidia/HelpSteer2
         | 
| 18 | 
            +
            - google/Synthetic-Persona-Chat
         | 
| 19 | 
            +
            - mlabonne/orpo-dpo-mix-40k
         | 
| 20 | 
             
            pipeline_tag: text-generation
         | 
| 21 | 
            +
            new_version: sthenno-com/miscii-14b-1225
         | 
| 22 | 
             
            model-index:
         | 
| 23 | 
             
            - name: miscii-14b-1028
         | 
| 24 | 
             
              results:
         | 
|  | |
| 35 | 
             
                  value: 82.37
         | 
| 36 | 
             
                  name: strict accuracy
         | 
| 37 | 
             
                source:
         | 
| 38 | 
            +
                  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
|  | |
| 39 | 
             
                  name: Open LLM Leaderboard
         | 
| 40 | 
             
              - task:
         | 
| 41 | 
             
                  type: text-generation
         | 
|  | |
| 50 | 
             
                  value: 49.26
         | 
| 51 | 
             
                  name: normalized accuracy
         | 
| 52 | 
             
                source:
         | 
| 53 | 
            +
                  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
|  | |
| 54 | 
             
                  name: Open LLM Leaderboard
         | 
| 55 | 
             
              - task:
         | 
| 56 | 
             
                  type: text-generation
         | 
|  | |
| 65 | 
             
                  value: 6.34
         | 
| 66 | 
             
                  name: exact match
         | 
| 67 | 
             
                source:
         | 
| 68 | 
            +
                  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
|  | |
| 69 | 
             
                  name: Open LLM Leaderboard
         | 
| 70 | 
             
              - task:
         | 
| 71 | 
             
                  type: text-generation
         | 
|  | |
| 80 | 
             
                  value: 14.21
         | 
| 81 | 
             
                  name: acc_norm
         | 
| 82 | 
             
                source:
         | 
| 83 | 
            +
                  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
|  | |
| 84 | 
             
                  name: Open LLM Leaderboard
         | 
| 85 | 
             
              - task:
         | 
| 86 | 
             
                  type: text-generation
         | 
|  | |
| 95 | 
             
                  value: 12
         | 
| 96 | 
             
                  name: acc_norm
         | 
| 97 | 
             
                source:
         | 
| 98 | 
            +
                  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
|  | |
| 99 | 
             
                  name: Open LLM Leaderboard
         | 
| 100 | 
             
              - task:
         | 
| 101 | 
             
                  type: text-generation
         | 
|  | |
| 112 | 
             
                  value: 46.14
         | 
| 113 | 
             
                  name: accuracy
         | 
| 114 | 
             
                source:
         | 
| 115 | 
            +
                  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
         | 
|  | |
| 116 | 
             
                  name: Open LLM Leaderboard
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 117 | 
             
            ---
         | 
| 118 |  | 
| 119 | 
             
            # miscii-14b-1028
         | 
|  | |
| 161 | 
             
            |MuSR (0-shot)      |12.00|
         | 
| 162 | 
             
            |MMLU-PRO (5-shot)  |46.14|
         | 
| 163 |  | 
| 164 | 
            +
            $$\large{\text{There's nothing more to Show}}$$
         | 
| 165 | 
            +
            # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
         | 
| 166 | 
            +
            Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/sthenno-com__miscii-14b-1028-details)
         | 
| 167 | 
            +
             | 
| 168 | 
            +
            |      Metric       |Value|
         | 
| 169 | 
            +
            |-------------------|----:|
         | 
| 170 | 
            +
            |Avg.               |42.38|
         | 
| 171 | 
            +
            |IFEval (0-Shot)    |82.37|
         | 
| 172 | 
            +
            |BBH (3-Shot)       |49.26|
         | 
| 173 | 
            +
            |MATH Lvl 5 (4-Shot)|50.30|
         | 
| 174 | 
            +
            |GPQA (0-shot)      |14.21|
         | 
| 175 | 
            +
            |MuSR (0-shot)      |12.00|
         | 
| 176 | 
            +
            |MMLU-PRO (5-shot)  |46.14|
         | 
| 177 | 
            +
             | 

