kt-test-account commited on
Commit
1ed31e5
Β·
1 Parent(s): 1948bd1

add aug heatmaps

Browse files
Files changed (6) hide show
  1. app.py +11 -4
  2. metric.py +49 -16
  3. process_data.py +51 -25
  4. task2.csv +11 -11
  5. task3.csv +10 -10
  6. updated.txt +1 -1
app.py CHANGED
@@ -178,7 +178,7 @@ def make_acc(results):
178
 
179
 
180
  @st.cache_data
181
- def make_heatmap(results,label = "generated", symbol = "πŸ€–"):
182
 
183
 
184
  # Assuming df is your wide-format DataFrame (models as rows, datasets as columns)
@@ -186,6 +186,10 @@ def make_heatmap(results,label = "generated", symbol = "πŸ€–"):
186
  team_order = results.index.tolist()
187
  df_long = df_long.loc[:,[c for c in df_long.columns if c.startswith(label) and "accuracy" not in c]]
188
  df_long.columns = [c.replace(f"{label}_","") for c in df_long.columns]
 
 
 
 
189
  df_long = df_long.reset_index().melt(id_vars='team', var_name='source', value_name='acc')
190
  # df_long.rename(columns={'index': 'source'}, inplace=True)
191
  # df_long
@@ -216,17 +220,20 @@ def make_heatmap(results,label = "generated", symbol = "πŸ€–"):
216
  chart = (heatmap + text).properties(
217
  width=600,
218
  height=500,
219
- title=f'Accuracy on {symbol} {label} sources heatmap'
220
  )
221
 
222
  return chart
223
 
224
 
225
  def get_heatmaps(temp):
226
- h1 = make_heatmap(temp, "generated", symbol = "πŸ€–")
227
- h2 = make_heatmap(temp, "pristine", symbol = "πŸ§‘β€πŸŽ€")
228
  st.altair_chart(h1, use_container_width=True)
229
  st.altair_chart(h2, use_container_width=True)
 
 
 
230
 
231
  def make_plots_for_task(task,split,best_only):
232
  # results = load_results(task, best_only=best_only)
 
178
 
179
 
180
  @st.cache_data
181
+ def make_heatmap(results,label = "generated", symbol = "πŸ€–", title = ""):
182
 
183
 
184
  # Assuming df is your wide-format DataFrame (models as rows, datasets as columns)
 
186
  team_order = results.index.tolist()
187
  df_long = df_long.loc[:,[c for c in df_long.columns if c.startswith(label) and "accuracy" not in c]]
188
  df_long.columns = [c.replace(f"{label}_","") for c in df_long.columns]
189
+
190
+ if "none" in df_long.columns:
191
+ df_long = df_long.drop(columns=["none"])
192
+
193
  df_long = df_long.reset_index().melt(id_vars='team', var_name='source', value_name='acc')
194
  # df_long.rename(columns={'index': 'source'}, inplace=True)
195
  # df_long
 
220
  chart = (heatmap + text).properties(
221
  width=600,
222
  height=500,
223
+ title=title
224
  )
225
 
226
  return chart
227
 
228
 
229
  def get_heatmaps(temp):
230
+ h1 = make_heatmap(temp, "generated", symbol = "πŸ€–", title = "Accuracy by πŸ€– geneated source")
231
+ h2 = make_heatmap(temp, "pristine", symbol = "πŸ§‘β€πŸŽ€", title = "Accuracy by πŸ§‘β€πŸŽ€ pristine source")
232
  st.altair_chart(h1, use_container_width=True)
233
  st.altair_chart(h2, use_container_width=True)
234
+ if temp.columns.str.contains("aug", case=False).any():
235
+ h3 = make_heatmap(temp, "aug", symbol="πŸ› οΈ", title = "Accuracy by πŸ› οΈ augmentation method on πŸ€– geneated data only")
236
+ st.altair_chart(h3, use_container_width=True)
237
 
238
  def make_plots_for_task(task,split,best_only):
239
  # results = load_results(task, best_only=best_only)
metric.py CHANGED
@@ -1,9 +1,7 @@
1
  import pandas as pd
2
  from huggingface_hub import hf_hub_download
3
- import json
4
 
5
-
6
- def _metric(solution_df,submission_df, mode = "top_level", admin = False):
7
  """
8
  This function calculates the accuracy of the generated predictions.
9
 
@@ -32,41 +30,76 @@ def _metric(solution_df,submission_df, mode = "top_level", admin = False):
32
 
33
  cols = ["split","pred", source_col]
34
 
35
-
36
  solution_df["correct"] = solution_df["pred"] == solution_df["submission_pred"]
37
- accuracy = solution_df.groupby(cols)["correct"].mean().to_frame("accuracy").reset_index()
38
- accuracy["score_name"] = accuracy["pred"] +"_"+ accuracy[source_col]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
 
40
  evaluation = {}
41
 
42
  split = "public"
43
 
44
- temp = accuracy.query(f"split=='{split}'")
45
- scores_by_source = temp.set_index("score_name")["accuracy"].sort_index()
46
- scores_by_source["generated_accuracy"] = temp.query("pred=='generated'")["accuracy"].mean()
47
- scores_by_source["pristine_accuracy"] = temp.query("pred=='pristine'")["accuracy"].mean()
48
- scores_by_source["balanced_accuracy"] = (scores_by_source["generated_accuracy"] + scores_by_source["pristine_accuracy"])/2.
 
49
 
50
 
51
  if mode == "top_level":
52
  scores_to_save = ["generated_accuracy", "pristine_accuracy", "balanced_accuracy"]
53
  evaluation[f"{split}_score"] = scores_by_source.loc[scores_to_save].to_dict()
54
  else:
 
 
 
 
55
  evaluation[f"{split}_score"] = scores_by_source.to_dict()
56
 
57
  split = "private"
58
  # private has everything
59
 
60
- temp = accuracy
61
- scores_by_source = temp.set_index("score_name")["accuracy"].sort_index()
62
- scores_by_source["generated_accuracy"] = temp.query("pred=='generated'")["accuracy"].mean()
63
- scores_by_source["pristine_accuracy"] = temp.query("pred=='pristine'")["accuracy"].mean()
64
- scores_by_source["balanced_accuracy"] = (scores_by_source["generated_accuracy"] + scores_by_source["pristine_accuracy"])/2.
 
 
65
 
66
  if mode == "top_level":
67
  scores_to_save = ["generated_accuracy", "pristine_accuracy", "balanced_accuracy"]
68
  evaluation[f"{split}_score"] = scores_by_source.loc[scores_to_save].to_dict()
69
  else:
 
 
 
 
70
  evaluation[f"{split}_score"] = scores_by_source.to_dict()
71
 
72
 
 
1
  import pandas as pd
2
  from huggingface_hub import hf_hub_download
 
3
 
4
+ def _metric(solution_df,submission_df, mode = "top_level", admin = False, additional_columns = None):
 
5
  """
6
  This function calculates the accuracy of the generated predictions.
7
 
 
30
 
31
  cols = ["split","pred", source_col]
32
 
 
33
  solution_df["correct"] = solution_df["pred"] == solution_df["submission_pred"]
34
+ acc_all = (
35
+ solution_df.groupby(cols)["correct"].mean().reset_index()
36
+ .rename(columns={"correct": "accuracy"})
37
+ )
38
+ acc_all["score_name"] = acc_all["pred"] + "_" + acc_all[source_col]
39
+
40
+ if additional_columns == None:
41
+ additional_columns = []
42
+
43
+ if not admin:
44
+ # annonomize
45
+ for c in additional_columns:
46
+ vals_lookup = pd.Series({v:f"{c[:1]}_{i:02}" for i,v in enumerate(sorted(solution_df[c].unique()))})
47
+ solution_df.loc[:,c] = vals_lookup.loc[solution_df.loc[:,c].values].values
48
+
49
+ def acc_by_additional_columns(temp, col):
50
+ temp = temp.groupby(col)["correct"].mean().reset_index().rename(columns={"correct": "accuracy"})
51
+ temp["score_name"] = col[:3] + "_" + temp[col]
52
+ return temp.set_index("score_name")["accuracy"].sort_index()
53
+
54
+ def acc_by_source(temp):
55
+ scores_by_source = temp.set_index("score_name")["accuracy"].sort_index()
56
+ scores_by_source["generated_accuracy"] = temp.query("pred=='generated'")["accuracy"].mean()
57
+ scores_by_source["pristine_accuracy"] = temp.query("pred=='pristine'")["accuracy"].mean()
58
+ scores_by_source["balanced_accuracy"] = (scores_by_source["generated_accuracy"] + scores_by_source["pristine_accuracy"])/2.
59
+ return scores_by_source
60
 
61
+
62
  evaluation = {}
63
 
64
  split = "public"
65
 
66
+ temp = acc_all.query(f"split=='{split}'")
67
+ scores_by_source = acc_by_source(temp)
68
+ # scores_by_source = temp.set_index("score_name")["accuracy"].sort_index()
69
+ # scores_by_source["generated_accuracy"] = temp.query("pred=='generated'")["accuracy"].mean()
70
+ # scores_by_source["pristine_accuracy"] = temp.query("pred=='pristine'")["accuracy"].mean()
71
+ # scores_by_source["balanced_accuracy"] = (scores_by_source["generated_accuracy"] + scores_by_source["pristine_accuracy"])/2.
72
 
73
 
74
  if mode == "top_level":
75
  scores_to_save = ["generated_accuracy", "pristine_accuracy", "balanced_accuracy"]
76
  evaluation[f"{split}_score"] = scores_by_source.loc[scores_to_save].to_dict()
77
  else:
78
+ out = [scores_by_source]
79
+ for col in additional_columns:
80
+ out.append(acc_by_additional_columns(solution_df.query(f"split=='{split}'"),col))
81
+ scores_by_source = pd.concat(out)
82
  evaluation[f"{split}_score"] = scores_by_source.to_dict()
83
 
84
  split = "private"
85
  # private has everything
86
 
87
+ temp = acc_all
88
+ scores_by_source = acc_by_source(temp)
89
+
90
+ # scores_by_source = temp.set_index("score_name")["accuracy"].sort_index()
91
+ # scores_by_source["generated_accuracy"] = temp.query("pred=='generated'")["accuracy"].mean()
92
+ # scores_by_source["pristine_accuracy"] = temp.query("pred=='pristine'")["accuracy"].mean()
93
+ # scores_by_source["balanced_accuracy"] = (scores_by_source["generated_accuracy"] + scores_by_source["pristine_accuracy"])/2.
94
 
95
  if mode == "top_level":
96
  scores_to_save = ["generated_accuracy", "pristine_accuracy", "balanced_accuracy"]
97
  evaluation[f"{split}_score"] = scores_by_source.loc[scores_to_save].to_dict()
98
  else:
99
+ out = [scores_by_source]
100
+ for col in additional_columns:
101
+ out.append(acc_by_additional_columns(solution_df,col))
102
+ scores_by_source = pd.concat(out)
103
  evaluation[f"{split}_score"] = scores_by_source.to_dict()
104
 
105
 
process_data.py CHANGED
@@ -19,6 +19,7 @@ def get_submission(f):
19
 
20
  return submissions
21
 
 
22
  # def get_submissions_file(f):
23
  # submission_df = pd.read_csv(f).set_index("id")
24
  # if isinstance(submission_df.iloc[0]["score"],str):
@@ -31,11 +32,14 @@ def get_submissions_file(f):
31
  if isinstance(submission_df.iloc[0]["score"], str):
32
  submission_df.loc[:, "score"] = submission_df.loc[:, "score"].apply(
33
  lambda a: float(
34
- np.array(json.loads(re.sub(r"\b(\d+)\.(?!\d)", r"\1.0", a))).squeeze() if isinstance(a,str) else float("nan")
 
 
35
  )
36
  )
37
  return submission_df
38
 
 
39
  def load_results(local_dir):
40
  team_file_name = "teams.json"
41
  team_info = pd.read_json(Path(local_dir) / team_file_name).T
@@ -73,7 +77,7 @@ def load_results(local_dir):
73
 
74
  def compute_metrics(submissions, local_dir, admin=True):
75
 
76
- submissions=submissions.query("status==3.0")
77
 
78
  # if not admin:
79
  # selected_by_team = submissions.groupby("team")["selected"].sum()
@@ -89,7 +93,15 @@ def compute_metrics(submissions, local_dir, admin=True):
89
  for i, row in submissions.T.items():
90
  # r = pd.read_csv(row["submission_files"]).set_index("id")
91
  r = get_submissions_file(row["submission_files"])
92
- eval = _metric(solution_df, r, mode="detailed", admin=admin)
 
 
 
 
 
 
 
 
93
  for m in ["private_score", "public_score"]:
94
  for f in fields:
95
  eval[m][f] = row[f]
@@ -130,35 +142,52 @@ def compute_metrics(submissions, local_dir, admin=True):
130
 
131
  results[m] = results[m].set_index("submission" if admin else "team")
132
 
 
 
 
 
 
 
 
133
 
134
- fields_to_merge = ['generated_accuracy', 'pristine_accuracy', 'balanced_accuracy', 'total_time', 'fail_rate']
 
 
 
 
 
 
 
 
 
135
 
136
- submissions = pd.concat([submissions.set_index("submission_id"),
137
- results["private_score"].reset_index().set_index("submission_id").loc[:,fields_to_merge]],axis = 1).reset_index()
138
-
139
  return results, submissions
140
 
141
 
142
- status_lookup="NA,QUEUED,PROCESSING,SUCCESS,FAILED".split(",")
143
 
144
- def process_data(path,save_path):
 
145
  submissions = load_results(path)
146
  submissions["datetime"] = pd.DatetimeIndex(submissions["datetime"])
147
  submissions["date"] = submissions["datetime"].dt.date
148
- submissions["status_reason"] = submissions["status"].astype(int).apply(lambda a: status_lookup[a])
149
- submissions.loc[:,["submission_id","datetime","date","status","status_reason"]].to_csv(save_path + "_submissions.csv")
 
 
 
 
150
 
151
- results,submissions = compute_metrics(submissions, path, admin=False)
152
- cols_to_drop = ["team_id","submission_id","submission_repo","submission"]
153
- results["public_score"].drop(columns =cols_to_drop).to_csv(save_path+".csv")
154
-
155
 
156
 
157
- if __name__=="__main__":
158
- process_data("comp_data_task1","task1")
159
- process_data("comp_data_task2","task2")
160
- process_data("comp_data_task3","task3")
161
- process_data("comp_data_practice","practice")
162
 
163
  # from datetime import date
164
 
@@ -168,19 +197,16 @@ if __name__=="__main__":
168
  # # Print date in YYYY-MM-DD format
169
  # print("Today's date:", today)
170
 
171
-
172
-
173
  from datetime import datetime
174
  import pytz
175
 
176
  # Define EST timezone
177
- est = pytz.timezone('US/Eastern')
178
 
179
  # Get current time in EST
180
  est_time = datetime.now(est)
181
 
182
  # Print current date and time in EST
183
  today = f"Updated on {est_time.strftime('%Y-%m-%d %H:%M:%S')} EST"
184
- with open("updated.txt","w") as f:
185
  f.write(str(today))
186
-
 
19
 
20
  return submissions
21
 
22
+
23
  # def get_submissions_file(f):
24
  # submission_df = pd.read_csv(f).set_index("id")
25
  # if isinstance(submission_df.iloc[0]["score"],str):
 
32
  if isinstance(submission_df.iloc[0]["score"], str):
33
  submission_df.loc[:, "score"] = submission_df.loc[:, "score"].apply(
34
  lambda a: float(
35
+ np.array(json.loads(re.sub(r"\b(\d+)\.(?!\d)", r"\1.0", a))).squeeze()
36
+ if isinstance(a, str)
37
+ else float("nan")
38
  )
39
  )
40
  return submission_df
41
 
42
+
43
  def load_results(local_dir):
44
  team_file_name = "teams.json"
45
  team_info = pd.read_json(Path(local_dir) / team_file_name).T
 
77
 
78
  def compute_metrics(submissions, local_dir, admin=True):
79
 
80
+ submissions = submissions.query("status==3.0")
81
 
82
  # if not admin:
83
  # selected_by_team = submissions.groupby("team")["selected"].sum()
 
93
  for i, row in submissions.T.items():
94
  # r = pd.read_csv(row["submission_files"]).set_index("id")
95
  r = get_submissions_file(row["submission_files"])
96
+ eval = _metric(
97
+ solution_df,
98
+ r,
99
+ mode="detailed",
100
+ admin=admin,
101
+ additional_columns=(
102
+ ["augmentation"] if "augmentation" in solution_df.columns else None
103
+ ),
104
+ )
105
  for m in ["private_score", "public_score"]:
106
  for f in fields:
107
  eval[m][f] = row[f]
 
142
 
143
  results[m] = results[m].set_index("submission" if admin else "team")
144
 
145
+ fields_to_merge = [
146
+ "generated_accuracy",
147
+ "pristine_accuracy",
148
+ "balanced_accuracy",
149
+ "total_time",
150
+ "fail_rate",
151
+ ]
152
 
153
+ submissions = pd.concat(
154
+ [
155
+ submissions.set_index("submission_id"),
156
+ results["private_score"]
157
+ .reset_index()
158
+ .set_index("submission_id")
159
+ .loc[:, fields_to_merge],
160
+ ],
161
+ axis=1,
162
+ ).reset_index()
163
 
 
 
 
164
  return results, submissions
165
 
166
 
167
+ status_lookup = "NA,QUEUED,PROCESSING,SUCCESS,FAILED".split(",")
168
 
169
+
170
+ def process_data(path, save_path):
171
  submissions = load_results(path)
172
  submissions["datetime"] = pd.DatetimeIndex(submissions["datetime"])
173
  submissions["date"] = submissions["datetime"].dt.date
174
+ submissions["status_reason"] = (
175
+ submissions["status"].astype(int).apply(lambda a: status_lookup[a])
176
+ )
177
+ submissions.loc[
178
+ :, ["submission_id", "datetime", "date", "status", "status_reason"]
179
+ ].to_csv(save_path + "_submissions.csv")
180
 
181
+ results, submissions = compute_metrics(submissions, path, admin=False)
182
+ cols_to_drop = ["team_id", "submission_id", "submission_repo", "submission"]
183
+ results["public_score"].drop(columns=cols_to_drop).to_csv(save_path + ".csv")
 
184
 
185
 
186
+ if __name__ == "__main__":
187
+ process_data("comp_data_task1", "task1")
188
+ process_data("comp_data_task2", "task2")
189
+ process_data("comp_data_task3", "task3")
190
+ process_data("comp_data_practice", "practice")
191
 
192
  # from datetime import date
193
 
 
197
  # # Print date in YYYY-MM-DD format
198
  # print("Today's date:", today)
199
 
 
 
200
  from datetime import datetime
201
  import pytz
202
 
203
  # Define EST timezone
204
+ est = pytz.timezone("US/Eastern")
205
 
206
  # Get current time in EST
207
  est_time = datetime.now(est)
208
 
209
  # Print current date and time in EST
210
  today = f"Updated on {est_time.strftime('%Y-%m-%d %H:%M:%S')} EST"
211
+ with open("updated.txt", "w") as f:
212
  f.write(str(today))
 
task2.csv CHANGED
@@ -1,11 +1,11 @@
1
- team,generated_g_02,generated_g_04,generated_g_05,generated_g_06,generated_g_09,generated_g_10,generated_g_11,pristine_p_00,pristine_p_01,pristine_p_02,pristine_p_05,pristine_p_09,pristine_p_10,pristine_p_11,pristine_p_16,pristine_p_18,pristine_p_20,generated_accuracy,pristine_accuracy,balanced_accuracy,total_time,fail_rate
2
- baseline-2,0.8789473684210526,0.9394736842105263,0.6868421052631579,0.9394736842105263,0.9078947368421053,0.9263157894736842,0.8236842105263158,0.9,0.66,0.895,0.915,0.935,0.68,0.815,0.85,0.88,0.945,0.8718045112781956,0.8474999999999999,0.8596522556390978,1558.1854865550995,0.0
3
- baseline-1,0.9210526315789473,0.9868421052631579,0.7684210526315789,0.9289473684210526,0.9763157894736842,0.9815789473684211,0.9157894736842105,0.96,0.36,0.955,0.815,1.0,0.69,0.29,0.635,0.885,0.955,0.925563909774436,0.7545,0.8400319548872179,2615.167044878006,0.008583690987124463
4
- ISPL,0.7157894736842105,0.868421052631579,0.48947368421052634,0.9578947368421052,0.8026315789473685,0.7526315789473684,0.7552631578947369,0.995,0.505,0.995,0.83,0.995,0.775,1.0,0.795,0.97,0.955,0.7631578947368421,0.8815,0.822328947368421,119.12918734550453,0.0
5
- Anon_Peking,0.9078947368421053,0.6605263157894737,0.6868421052631579,0.9210526315789473,0.881578947368421,0.868421052631579,0.5736842105263158,1.0,0.26,0.75,0.575,0.86,0.73,0.955,0.89,0.91,0.995,0.7857142857142856,0.7925,0.7891071428571428,256.57893514633156,0.0
6
- viper-purdue,0.7447368421052631,0.15526315789473685,0.05526315789473684,0.6736842105263158,0.5842105263157895,0.7578947368421053,0.18421052631578946,1.0,0.975,0.995,0.985,0.94,0.965,0.99,0.94,0.99,0.99,0.45075187969924807,0.977,0.713875939849624,454.11514687538124,0.0
7
- JAIST-HIS,0.46842105263157896,0.9473684210526315,0.9421052631578948,0.4394736842105263,0.6210526315789474,0.9842105263157894,0.3105263157894737,0.645,0.7,0.98,0.645,0.61,0.99,0.12,0.845,0.78,0.93,0.6733082706766919,0.7245,0.6989041353383459,602.246269226074,0.0
8
- csun22,0.4842105263157895,0.618421052631579,0.7631578947368421,0.4710526315789474,0.3763157894736842,0.32105263157894737,0.47368421052631576,0.59,0.995,0.92,0.535,0.755,0.61,0.405,0.485,0.53,0.4,0.5011278195488722,0.6225,0.5618139097744361,112.02455472946144,0.0
9
- DMF,0.95,0.8842105263157894,0.9578947368421052,0.9868421052631579,0.8710526315789474,0.9736842105263158,0.9921052631578947,0.26,0.0,0.005,0.05,0.45,0.08,0.01,0.225,0.265,0.05,0.9451127819548872,0.1395,0.5423063909774436,94.64070606231667,0.0
10
- safe-test,0.49736842105263157,0.4710526315789474,0.4789473684210526,0.4842105263157895,0.5236842105263158,0.5026315789473684,0.49473684210526314,0.5,0.455,0.495,0.495,0.505,0.515,0.505,0.535,0.51,0.51,0.4932330827067669,0.5025,0.49786654135338343,200.45346903800942,0.0
11
- gylin,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1,1.0
 
1
+ team,generated_g_02,generated_g_04,generated_g_05,generated_g_06,generated_g_09,generated_g_10,generated_g_11,pristine_p_00,pristine_p_01,pristine_p_02,pristine_p_05,pristine_p_09,pristine_p_10,pristine_p_11,pristine_p_16,pristine_p_18,pristine_p_20,generated_accuracy,pristine_accuracy,balanced_accuracy,aug_a_00,aug_a_01,aug_a_02,aug_a_03,aug_a_04,aug_a_05,aug_a_06,aug_a_07,aug_a_08,aug_a_09,aug_a_10,aug_a_11,aug_a_12,aug_a_13,aug_a_14,aug_a_15,aug_a_16,aug_a_17,aug_a_18,aug_a_19,total_time,fail_rate
2
+ baseline-2,0.8789473684210526,0.9394736842105263,0.6868421052631579,0.9394736842105263,0.9078947368421053,0.9263157894736842,0.8236842105263158,0.9,0.66,0.895,0.915,0.935,0.68,0.815,0.85,0.88,0.945,0.8718045112781956,0.8474999999999999,0.8596522556390978,0.9214285714285714,0.9785714285714285,0.9928571428571429,0.8714285714285714,0.8857142857142857,0.8571428571428571,0.9142857142857143,0.45714285714285713,0.8475,0.8142857142857143,0.8428571428571429,0.7285714285714285,0.9,0.9071428571428571,1.0,1.0,0.9214285714285714,0.7642857142857142,0.9,0.9071428571428571,1558.1854865550995,0.0
3
+ baseline-1,0.9210526315789473,0.9868421052631579,0.7684210526315789,0.9289473684210526,0.9763157894736842,0.9815789473684211,0.9157894736842105,0.96,0.36,0.955,0.815,1.0,0.69,0.29,0.635,0.885,0.955,0.925563909774436,0.7545,0.8400319548872179,0.9928571428571429,0.9928571428571429,1.0,1.0,1.0,0.9357142857142857,0.9571428571428572,0.7928571428571428,0.7545,0.9928571428571429,0.85,1.0,0.9214285714285714,0.8857142857142857,1.0,1.0,0.5071428571428571,1.0,0.8857142857142857,0.8714285714285714,2615.167044878006,0.008583690987124463
4
+ ISPL,0.7157894736842105,0.868421052631579,0.48947368421052634,0.9578947368421052,0.8026315789473685,0.7526315789473684,0.7552631578947369,0.995,0.505,0.995,0.83,0.995,0.775,1.0,0.795,0.97,0.955,0.7631578947368421,0.8815,0.822328947368421,0.7285714285714285,0.9857142857142858,0.9928571428571429,0.4714285714285714,0.6214285714285714,0.8428571428571429,0.8642857142857143,0.4642857142857143,0.8815,0.6357142857142857,0.6428571428571429,0.7857142857142857,0.7571428571428571,0.8785714285714286,0.8571428571428571,0.65,0.7357142857142858,0.8,0.8857142857142857,0.9,119.12918734550453,0.0
5
+ Anon_Peking,0.9078947368421053,0.6605263157894737,0.6868421052631579,0.9210526315789473,0.881578947368421,0.868421052631579,0.5736842105263158,1.0,0.26,0.75,0.575,0.86,0.73,0.955,0.89,0.91,0.995,0.7857142857142856,0.7925,0.7891071428571428,0.8857142857142857,0.9642857142857143,0.8785714285714286,0.9357142857142857,0.8928571428571429,0.8214285714285714,0.75,0.37142857142857144,0.7925,0.95,0.8071428571428572,0.7214285714285714,0.7142857142857143,0.7142857142857143,0.9428571428571428,0.8428571428571429,0.5571428571428572,0.7285714285714285,0.7071428571428572,0.7428571428571429,256.57893514633156,0.0
6
+ viper-purdue,0.7447368421052631,0.15526315789473685,0.05526315789473684,0.6736842105263158,0.5842105263157895,0.7578947368421053,0.18421052631578946,1.0,0.975,0.995,0.985,0.94,0.965,0.99,0.94,0.99,0.99,0.45075187969924807,0.977,0.713875939849624,0.5785714285714286,0.40714285714285714,0.30714285714285716,0.7071428571428572,0.7142857142857143,0.6428571428571429,0.5857142857142857,0.007142857142857143,0.977,0.6071428571428571,0.29285714285714287,0.5285714285714286,0.36428571428571427,0.32142857142857145,0.4642857142857143,0.35,0.37142857142857144,0.5071428571428571,0.30714285714285716,0.5,454.11514687538124,0.0
7
+ JAIST-HIS,0.46842105263157896,0.9473684210526315,0.9421052631578948,0.4394736842105263,0.6210526315789474,0.9842105263157894,0.3105263157894737,0.645,0.7,0.98,0.645,0.61,0.99,0.12,0.845,0.78,0.93,0.6733082706766919,0.7245,0.6989041353383459,0.6285714285714286,0.7428571428571429,0.9714285714285714,0.6071428571428571,0.5642857142857143,0.5071428571428571,0.5428571428571428,0.4714285714285714,0.7245,0.5714285714285714,0.5571428571428572,1.0,0.55,0.5285714285714286,1.0,0.9785714285714285,0.5071428571428571,0.9857142857142858,0.5285714285714286,0.55,602.246269226074,0.0
8
+ csun22,0.4842105263157895,0.618421052631579,0.7631578947368421,0.4710526315789474,0.3763157894736842,0.32105263157894737,0.47368421052631576,0.59,0.995,0.92,0.535,0.755,0.61,0.405,0.485,0.53,0.4,0.5011278195488722,0.6225,0.5618139097744361,0.8214285714285714,0.32857142857142857,0.02857142857142857,0.9785714285714285,0.9357142857142857,0.5214285714285715,0.17857142857142858,0.9428571428571428,0.6225,0.5642857142857143,0.9071428571428571,0.8285714285714286,0.21428571428571427,0.17142857142857143,0.4,0.2,0.40714285714285714,0.8142857142857143,0.17142857142857143,0.10714285714285714,112.02455472946144,0.0
9
+ DMF,0.95,0.8842105263157894,0.9578947368421052,0.9868421052631579,0.8710526315789474,0.9736842105263158,0.9921052631578947,0.26,0.0,0.005,0.05,0.45,0.08,0.01,0.225,0.265,0.05,0.9451127819548872,0.1395,0.5423063909774436,0.9714285714285714,0.9571428571428572,0.9357142857142857,0.9785714285714285,0.9785714285714285,0.9571428571428572,0.95,0.95,0.1395,0.9857142857142858,0.9,0.9857142857142858,0.9428571428571428,0.9428571428571428,0.9571428571428572,0.9928571428571429,0.7142857142857143,0.9785714285714285,0.9428571428571428,0.9357142857142857,94.64070606231667,0.0
10
+ safe-test,0.49736842105263157,0.4710526315789474,0.4789473684210526,0.4842105263157895,0.5236842105263158,0.5026315789473684,0.49473684210526314,0.5,0.455,0.495,0.495,0.505,0.515,0.505,0.535,0.51,0.51,0.4932330827067669,0.5025,0.49786654135338343,0.4142857142857143,0.6142857142857143,0.45714285714285713,0.5,0.4642857142857143,0.4714285714285714,0.5428571428571428,0.55,0.5025,0.4928571428571429,0.5571428571428572,0.5285714285714286,0.4785714285714286,0.4642857142857143,0.4642857142857143,0.5214285714285715,0.45,0.4785714285714286,0.44285714285714284,0.4785714285714286,200.45346903800942,0.0
11
+ gylin,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1,1.0
task3.csv CHANGED
@@ -1,10 +1,10 @@
1
- team,generated_g_02,generated_g_04,generated_g_05,generated_g_06,generated_g_09,generated_g_10,generated_g_11,pristine_p_00,pristine_p_01,pristine_p_02,pristine_p_05,pristine_p_09,pristine_p_10,pristine_p_11,pristine_p_16,pristine_p_18,pristine_p_20,generated_accuracy,pristine_accuracy,balanced_accuracy,total_time,fail_rate
2
- baseline-1,0.42,0.795,0.48,0.71,0.815,0.725,0.43,0.96,0.355,0.955,0.815,1.0,0.69,0.29,0.63,0.885,0.955,0.625,0.7535000000000001,0.68925,1910.4597923755646,0.0035294117647058825
3
- ISPL,0.16,0.63,0.195,0.745,0.295,0.23,0.625,1.0,0.51,0.985,0.845,0.995,0.77,1.0,0.755,0.965,0.955,0.4114285714285714,0.8779999999999999,0.6447142857142857,82.67647147178633,0.0
4
- JAIST-HIS,0.315,0.885,0.78,0.225,0.615,0.9,0.085,0.645,0.7,0.98,0.645,0.61,0.99,0.12,0.845,0.78,0.93,0.5435714285714286,0.7245,0.6340357142857143,455.8254814147948,0.0
5
- baseline-2,0.205,0.475,0.33,0.48,0.36,0.685,0.24,0.9,0.66,0.895,0.915,0.935,0.68,0.815,0.85,0.88,0.945,0.39642857142857146,0.8474999999999999,0.6219642857142857,1165.3817403316498,0.01088235294117647
6
- ya-jiang,0.285,0.18,0.095,0.375,0.195,0.585,0.115,1.0,0.87,0.965,0.92,0.98,0.885,0.995,0.975,0.975,1.0,0.2614285714285714,0.9564999999999999,0.6089642857142856,187.8163781166075,0.0
7
- viper-purdue,0.71,0.67,0.565,0.54,0.64,0.6,0.4,0.7,0.805,0.785,0.555,0.415,0.655,0.635,0.205,0.585,0.39,0.5892857142857143,0.573,0.5811428571428572,329.811629295349,0.0
8
- safe-test,0.515,0.525,0.535,0.545,0.455,0.52,0.51,0.495,0.5,0.51,0.48,0.52,0.52,0.5,0.495,0.43,0.5,0.515,0.49499999999999994,0.505,145.18649339675886,0.0
9
- yyxb,0.73,0.775,0.745,0.98,0.795,0.84,1.0,0.24,0.0,0.0,0.04,0.405,0.075,0.0,0.21,0.225,0.04,0.8378571428571429,0.12350000000000001,0.48067857142857146,71.99708724021895,0.0
10
- csun22,0.21,0.575,0.52,0.24,0.19,0.235,0.315,0.59,0.995,0.92,0.535,0.755,0.61,0.405,0.485,0.53,0.4,0.3264285714285714,0.6225,0.4744642857142857,84.21037721633896,0.0
 
1
+ team,generated_g_02,generated_g_04,generated_g_05,generated_g_06,generated_g_09,generated_g_10,generated_g_11,pristine_p_00,pristine_p_01,pristine_p_02,pristine_p_05,pristine_p_09,pristine_p_10,pristine_p_11,pristine_p_16,pristine_p_18,pristine_p_20,generated_accuracy,pristine_accuracy,balanced_accuracy,aug_a_00,aug_a_01,aug_a_02,aug_a_03,aug_a_04,total_time,fail_rate
2
+ baseline-1,0.42,0.795,0.48,0.71,0.815,0.725,0.43,0.96,0.355,0.955,0.815,1.0,0.69,0.29,0.63,0.885,0.955,0.625,0.7535000000000001,0.68925,0.7428571428571429,0.7535,0.6,0.3457142857142857,0.8114285714285714,1910.4597923755646,0.0035294117647058825
3
+ ISPL,0.16,0.63,0.195,0.745,0.295,0.23,0.625,1.0,0.51,0.985,0.845,0.995,0.77,1.0,0.755,0.965,0.955,0.4114285714285714,0.8779999999999999,0.6447142857142857,0.3485714285714286,0.878,0.37714285714285717,0.30857142857142855,0.6114285714285714,82.67647147178633,0.0
4
+ JAIST-HIS,0.315,0.885,0.78,0.225,0.615,0.9,0.085,0.645,0.7,0.98,0.645,0.61,0.99,0.12,0.845,0.78,0.93,0.5435714285714286,0.7245,0.6340357142857143,0.4857142857142857,0.7245,0.6285714285714286,0.7057142857142857,0.35428571428571426,455.8254814147948,0.0
5
+ baseline-2,0.205,0.475,0.33,0.48,0.36,0.685,0.24,0.9,0.66,0.895,0.915,0.935,0.68,0.815,0.85,0.88,0.945,0.39642857142857146,0.8474999999999999,0.6219642857142857,0.35428571428571426,0.8475,0.37714285714285717,0.12,0.7342857142857143,1165.3817403316498,0.01088235294117647
6
+ ya-jiang,0.285,0.18,0.095,0.375,0.195,0.585,0.115,1.0,0.87,0.965,0.92,0.98,0.885,0.995,0.975,0.975,1.0,0.2614285714285714,0.9564999999999999,0.6089642857142856,0.14,0.9565,0.27714285714285714,0.02857142857142857,0.6,187.8163781166075,0.0
7
+ viper-purdue,0.71,0.67,0.565,0.54,0.64,0.6,0.4,0.7,0.805,0.785,0.555,0.415,0.655,0.635,0.205,0.585,0.39,0.5892857142857143,0.573,0.5811428571428572,0.4657142857142857,0.573,0.5657142857142857,0.8971428571428571,0.42857142857142855,329.811629295349,0.0
8
+ safe-test,0.515,0.525,0.535,0.545,0.455,0.52,0.51,0.495,0.5,0.51,0.48,0.52,0.52,0.5,0.495,0.43,0.5,0.515,0.49499999999999994,0.505,0.54,0.495,0.5314285714285715,0.47714285714285715,0.5114285714285715,145.18649339675886,0.0
9
+ yyxb,0.73,0.775,0.745,0.98,0.795,0.84,1.0,0.24,0.0,0.0,0.04,0.405,0.075,0.0,0.21,0.225,0.04,0.8378571428571429,0.12350000000000001,0.48067857142857146,0.9542857142857143,0.1235,0.9057142857142857,0.52,0.9714285714285714,71.99708724021895,0.0
10
+ csun22,0.21,0.575,0.52,0.24,0.19,0.235,0.315,0.59,0.995,0.92,0.535,0.755,0.61,0.405,0.485,0.53,0.4,0.3264285714285714,0.6225,0.4744642857142857,0.6171428571428571,0.6225,0.03428571428571429,0.26571428571428574,0.38857142857142857,84.21037721633896,0.0
updated.txt CHANGED
@@ -1 +1 @@
1
- Updated on 2025-04-23 12:56:53 EST
 
1
+ Updated on 2025-04-23 16:06:57 EST