evalita_llm_leaderboard

Running

App Files Files Community

rzanoli commited on Aug 25

Commit

9e1a6ae

1 Parent(s): b89d424

Small changes

Browse files

Files changed (1) hide show

app.py +7 -9

app.py CHANGED Viewed

@@ -48,7 +48,7 @@ def mean_of_max_per_field(df):
 def boxplot_per_task(dataframe=None, baselines=None):
-    print(dataframe.columns)
     tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
@@ -76,7 +76,6 @@ def boxplot_per_task(dataframe=None, baselines=None):
                 y=y_data,
                 name=task,
                 marker=dict(color=colors[i]),
-                # Modifica: Impostiamo il colore della linea della scatola su un colore diverso dal riempimento
                 line=dict(color="black", width=2),
                 fillcolor=colors[i],
                 opacity=0.7,
@@ -105,8 +104,8 @@ def boxplot_per_task(dataframe=None, baselines=None):
     fig.update_layout(
         title="Distribution of Model Accuracy by Task",
-        xaxis_title="Task",
-        yaxis_title="Accuracy (%)",
         template="plotly_white",
         boxmode="group",
         dragmode=False,
@@ -119,7 +118,7 @@ def boxplot_per_task(dataframe=None, baselines=None):
                     "indicate best-performing supervised models evaluated on EVALITA."
                 ),
                 xref="paper", yref="paper",
-                x=0.5, y=-0.33,
                 showarrow=False,
                 font=dict(size=12, color="gray")
             )
@@ -130,7 +129,6 @@ def boxplot_per_task(dataframe=None, baselines=None):
     return fig
-# 🔹 Esempio d’uso
 BASELINES = {
     "TE":71.00, "SA": 66.38, "HS": 80.88, "AT": 82.40, "WIC": 85.00,
     "LS": 38.82, "SU": 38.91, "NER":88.00, "REL": 62.99
@@ -187,16 +185,16 @@ def boxplot_prompts_per_task(dataframe, tasks=None):
     for x, y, text in zip(best_x, best_y, best_text):
         fig.add_annotation(
             x=x,
-            y=y + 1,  # leggermente sopra la barra
             text=text,
             showarrow=False,
             font=dict(size=12, color="black")
         )
     fig.update_layout(
-        title="Comparison of Average Prompt Accuracy vs Best Prompt Accuracy per Task",
         xaxis_title="Task",
-        yaxis_title="Accuracy (%)",
         barmode='group',
         template="plotly_white",
         font=dict(family="Arial", size=13),

 def boxplot_per_task(dataframe=None, baselines=None):
+    #print(dataframe.columns)
     tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
                 y=y_data,
                 name=task,
                 marker=dict(color=colors[i]),
                 line=dict(color="black", width=2),
                 fillcolor=colors[i],
                 opacity=0.7,
     fig.update_layout(
         title="Distribution of Model Accuracy by Task",
+        #xaxis_title="Task",
+        yaxis_title="Avg. Combined Performance ⬆️",
         template="plotly_white",
         boxmode="group",
         dragmode=False,
                     "indicate best-performing supervised models evaluated on EVALITA."
                 ),
                 xref="paper", yref="paper",
+                x=0.5, y=-0.30,
                 showarrow=False,
                 font=dict(size=12, color="gray")
             )
     return fig
 BASELINES = {
     "TE":71.00, "SA": 66.38, "HS": 80.88, "AT": 82.40, "WIC": 85.00,
     "LS": 38.82, "SU": 38.91, "NER":88.00, "REL": 62.99
     for x, y, text in zip(best_x, best_y, best_text):
         fig.add_annotation(
             x=x,
+            y=y + 3,  # leggermente sopra la barra
             text=text,
             showarrow=False,
             font=dict(size=12, color="black")
         )
     fig.update_layout(
+        title="Average Prompt Accuracy vs Best Prompt Accuracy per Task",
         xaxis_title="Task",
+        yaxis_title="Avg. Combined Performance ⬆️",
         barmode='group',
         template="plotly_white",
         font=dict(family="Arial", size=13),