Spaces:
Running
Running
udpate scritps
Browse files- app.py +12 -6
- results/SeaExam_results_0419.csv +0 -46
app.py
CHANGED
@@ -1,12 +1,18 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import os
|
|
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# Load the CSV file
|
12 |
def load_csv(file_path):
|
@@ -14,7 +20,7 @@ def load_csv(file_path):
|
|
14 |
return data
|
15 |
|
16 |
# Example path to your CSV file
|
17 |
-
csv_path = '
|
18 |
data = load_csv(csv_path)
|
19 |
|
20 |
def show_data():
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import os
|
4 |
+
from huggingface_hub import snapshot_download
|
5 |
|
6 |
+
# clone / pull the lmeh eval data
|
7 |
+
TOKEN = os.environ.get("TOKEN", None)
|
8 |
+
RESULTS_REPO = f"lukecq/SeaExam-results"
|
9 |
+
CACHE_PATH=os.getenv("HF_HOME", ".")
|
10 |
+
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
11 |
+
print(EVAL_RESULTS_PATH)
|
12 |
+
snapshot_download(
|
13 |
+
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
|
14 |
+
token=TOKEN
|
15 |
+
)
|
16 |
|
17 |
# Load the CSV file
|
18 |
def load_csv(file_path):
|
|
|
20 |
return data
|
21 |
|
22 |
# Example path to your CSV file
|
23 |
+
csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results_0419.csv'
|
24 |
data = load_csv(csv_path)
|
25 |
|
26 |
def show_data():
|
results/SeaExam_results_0419.csv
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
,,,,M3Exam,,,,,,
|
2 |
-
Model,type,open?,shot,en,zh,id,th,vi,avg,avg_sea
|
3 |
-
gpt4-1106,chat,N,0,0.877,0.789,0.649,0.69,0.708,0.742,0.682
|
4 |
-
Meta-Llama-3-70B,base,Y,3,0.844,0.756,0.619,0.662,0.683,0.713,0.654
|
5 |
-
Meta-Llama-3-70B-Instruct,chat,Y,3,0.863,0.694,0.63,0.643,0.684,0.703,0.652
|
6 |
-
Qwen1.5-72B,base,Y,3,0.839,0.925,0.587,0.568,0.648,0.713,0.601
|
7 |
-
claude-3-sonnet-20240229,chat,N,0,0.789,0.683,0.585,0.571,0.626,0.651,0.594
|
8 |
-
claude-3-haiku-20240307,chat,N,0,0.79,0.652,0.563,0.573,0.631,0.642,0.589
|
9 |
-
dbrx-base,base,Y,3,0.808,0.689,0.534,0.507,0.605,0.629,0.548
|
10 |
-
Mixtral-8x22B-v0.1,base,Y,3,0.839,0.696,0.57,0.487,0.601,0.639,0.553
|
11 |
-
SeaLLM-7B-v2.5,chat,Y,3,0.759,0.602,0.501,0.507,0.618,0.597,0.542
|
12 |
-
Qwen1.5-14B,base,Y,3,0.797,0.862,0.527,0.478,0.549,0.643,0.518
|
13 |
-
gemini-1.0-pro,chat,N,0,0.569,0.725,0.44,0.492,0.605,0.566,0.513
|
14 |
-
gemma-7b,base,Y,3,0.731,0.528,0.465,0.463,0.597,0.557,0.508
|
15 |
-
gpt-3.5-turbo-0125,chat,N,3,0.751,0.589,0.5,0.389,0.534,0.552,0.474
|
16 |
-
Mixtral-8x7B-v0.1,base,Y,3,0.771,0.606,0.48,0.435,0.522,0.563,0.479
|
17 |
-
Llama-2-70b-hf,base,Y,3,0.749,0.599,0.492,0.345,0.559,0.549,0.465
|
18 |
-
Meta-Llama-3-8B,base,Y,3,0.7,0.54,0.427,0.454,0.509,0.526,0.463
|
19 |
-
Sailor-7B-Chat,chat,Y,3,0.656,0.651,0.474,0.464,0.512,0.551,0.483
|
20 |
-
gpt-3.5-turbo-0125,chat,N,0,0.756,0.606,0.493,0.397,0.529,0.556,0.473
|
21 |
-
Yi-34B,base,Y,3,0.815,0.86,0.541,0.381,0.502,0.62,0.475
|
22 |
-
Meta-Llama-3-8B-Instruct,chat,Y,3,0.725,0.537,0.466,0.371,0.509,0.522,0.449
|
23 |
-
SeaLLM-7B-v2,chat,Y,3,0.702,0.516,0.432,0.406,0.515,0.514,0.451
|
24 |
-
Sailor-7B,base,Y,3,0.611,0.632,0.443,0.41,0.499,0.519,0.451
|
25 |
-
Qwen1.5-7B-Chat,chat,Y,3,0.646,0.627,0.43,0.398,0.492,0.519,0.44
|
26 |
-
Yi-9B,base,Y,3,0.775,0.792,0.492,0.357,0.453,0.574,0.434
|
27 |
-
Qwen1.5-7B,base,Y,3,0.721,0.811,0.441,0.361,0.45,0.557,0.417
|
28 |
-
Mistral-7B-v0.1,base,Y,3,0.677,0.497,0.422,0.346,0.409,0.47,0.392
|
29 |
-
gemma-7b-it,chat,Y,3,0.622,0.427,0.373,0.321,0.467,0.442,0.387
|
30 |
-
Mistral-7B-Instruct-v0.2,chat,Y,3,0.657,0.495,0.404,0.304,0.399,0.452,0.369
|
31 |
-
Qwen1.5-4B,base,Y,3,0.664,0.772,0.351,0.319,0.389,0.499,0.353
|
32 |
-
Yi-6B,base,Y,3,0.704,0.809,0.411,0.298,0.37,0.519,0.36
|
33 |
-
Llama-2-13b-hf,base,Y,3,0.605,0.365,0.384,0.288,0.409,0.41,0.36
|
34 |
-
Llama-2-13b-chat-hf,chat,Y,3,0.589,0.382,0.372,0.288,0.39,0.404,0.35
|
35 |
-
Qwen1.5-MoE-A2.7B,base,Y,3,0.628,0.789,0.366,0.254,0.402,0.488,0.341
|
36 |
-
gemma-2b-it,chat,Y,3,0.439,0.377,0.316,0.284,0.357,0.355,0.319
|
37 |
-
Llama-2-7b-chat-hf,chat,Y,3,0.566,0.326,0.341,0.268,0.34,0.368,0.317
|
38 |
-
bloomz-7b1,chat,Y,3,0.431,0.377,0.361,0.256,0.356,0.356,0.325
|
39 |
-
gemma-2b,base,Y,3,0.417,0.275,0.304,0.286,0.316,0.32,0.302
|
40 |
-
Llama-2-7b-hf,base,Y,3,0.491,0.323,0.308,0.263,0.317,0.341,0.296
|
41 |
-
Qwen1.5-1.8B,base,Y,3,0.546,0.713,0.326,0.244,0.324,0.43,0.298
|
42 |
-
Qwen1.5-0.5B,base,Y,3,0.446,0.61,0.294,0.26,0.297,0.381,0.284
|
43 |
-
sea-lion-7b-instruct,chat,Y,3,0.27,0.273,0.287,0.264,0.269,0.273,0.273
|
44 |
-
sea-lion-7b,base,Y,3,0.245,0.228,0.254,0.264,0.241,0.247,0.253
|
45 |
-
phi-2,base,Y,3,0.582,0.286,0.295,0.21,0.269,0.328,0.258
|
46 |
-
bloom-7b1,base,Y,3,0.227,0.183,0.253,0.24,0.243,0.229,0.246
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|