Small changes
Browse files
app.py
CHANGED
|
@@ -48,7 +48,7 @@ def mean_of_max_per_field(df):
|
|
| 48 |
|
| 49 |
def boxplot_per_task(dataframe=None, baselines=None):
|
| 50 |
|
| 51 |
-
print(dataframe.columns)
|
| 52 |
|
| 53 |
tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
|
| 54 |
|
|
@@ -76,7 +76,6 @@ def boxplot_per_task(dataframe=None, baselines=None):
|
|
| 76 |
y=y_data,
|
| 77 |
name=task,
|
| 78 |
marker=dict(color=colors[i]),
|
| 79 |
-
# Modifica: Impostiamo il colore della linea della scatola su un colore diverso dal riempimento
|
| 80 |
line=dict(color="black", width=2),
|
| 81 |
fillcolor=colors[i],
|
| 82 |
opacity=0.7,
|
|
@@ -105,8 +104,8 @@ def boxplot_per_task(dataframe=None, baselines=None):
|
|
| 105 |
|
| 106 |
fig.update_layout(
|
| 107 |
title="Distribution of Model Accuracy by Task",
|
| 108 |
-
xaxis_title="Task",
|
| 109 |
-
yaxis_title="
|
| 110 |
template="plotly_white",
|
| 111 |
boxmode="group",
|
| 112 |
dragmode=False,
|
|
@@ -119,7 +118,7 @@ def boxplot_per_task(dataframe=None, baselines=None):
|
|
| 119 |
"indicate best-performing supervised models evaluated on EVALITA."
|
| 120 |
),
|
| 121 |
xref="paper", yref="paper",
|
| 122 |
-
x=0.5, y=-0.
|
| 123 |
showarrow=False,
|
| 124 |
font=dict(size=12, color="gray")
|
| 125 |
)
|
|
@@ -130,7 +129,6 @@ def boxplot_per_task(dataframe=None, baselines=None):
|
|
| 130 |
return fig
|
| 131 |
|
| 132 |
|
| 133 |
-
# 🔹 Esempio d’uso
|
| 134 |
BASELINES = {
|
| 135 |
"TE":71.00, "SA": 66.38, "HS": 80.88, "AT": 82.40, "WIC": 85.00,
|
| 136 |
"LS": 38.82, "SU": 38.91, "NER":88.00, "REL": 62.99
|
|
@@ -187,16 +185,16 @@ def boxplot_prompts_per_task(dataframe, tasks=None):
|
|
| 187 |
for x, y, text in zip(best_x, best_y, best_text):
|
| 188 |
fig.add_annotation(
|
| 189 |
x=x,
|
| 190 |
-
y=y +
|
| 191 |
text=text,
|
| 192 |
showarrow=False,
|
| 193 |
font=dict(size=12, color="black")
|
| 194 |
)
|
| 195 |
|
| 196 |
fig.update_layout(
|
| 197 |
-
title="
|
| 198 |
xaxis_title="Task",
|
| 199 |
-
yaxis_title="
|
| 200 |
barmode='group',
|
| 201 |
template="plotly_white",
|
| 202 |
font=dict(family="Arial", size=13),
|
|
|
|
| 48 |
|
| 49 |
def boxplot_per_task(dataframe=None, baselines=None):
|
| 50 |
|
| 51 |
+
#print(dataframe.columns)
|
| 52 |
|
| 53 |
tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
|
| 54 |
|
|
|
|
| 76 |
y=y_data,
|
| 77 |
name=task,
|
| 78 |
marker=dict(color=colors[i]),
|
|
|
|
| 79 |
line=dict(color="black", width=2),
|
| 80 |
fillcolor=colors[i],
|
| 81 |
opacity=0.7,
|
|
|
|
| 104 |
|
| 105 |
fig.update_layout(
|
| 106 |
title="Distribution of Model Accuracy by Task",
|
| 107 |
+
#xaxis_title="Task",
|
| 108 |
+
yaxis_title="Avg. Combined Performance ⬆️",
|
| 109 |
template="plotly_white",
|
| 110 |
boxmode="group",
|
| 111 |
dragmode=False,
|
|
|
|
| 118 |
"indicate best-performing supervised models evaluated on EVALITA."
|
| 119 |
),
|
| 120 |
xref="paper", yref="paper",
|
| 121 |
+
x=0.5, y=-0.30,
|
| 122 |
showarrow=False,
|
| 123 |
font=dict(size=12, color="gray")
|
| 124 |
)
|
|
|
|
| 129 |
return fig
|
| 130 |
|
| 131 |
|
|
|
|
| 132 |
BASELINES = {
|
| 133 |
"TE":71.00, "SA": 66.38, "HS": 80.88, "AT": 82.40, "WIC": 85.00,
|
| 134 |
"LS": 38.82, "SU": 38.91, "NER":88.00, "REL": 62.99
|
|
|
|
| 185 |
for x, y, text in zip(best_x, best_y, best_text):
|
| 186 |
fig.add_annotation(
|
| 187 |
x=x,
|
| 188 |
+
y=y + 3, # leggermente sopra la barra
|
| 189 |
text=text,
|
| 190 |
showarrow=False,
|
| 191 |
font=dict(size=12, color="black")
|
| 192 |
)
|
| 193 |
|
| 194 |
fig.update_layout(
|
| 195 |
+
title="Average Prompt Accuracy vs Best Prompt Accuracy per Task",
|
| 196 |
xaxis_title="Task",
|
| 197 |
+
yaxis_title="Avg. Combined Performance ⬆️",
|
| 198 |
barmode='group',
|
| 199 |
template="plotly_white",
|
| 200 |
font=dict(family="Arial", size=13),
|