Upload from GitHub Actions: Display N/A scores as such
Browse files
.github/workflows/nightly-evals.yml
CHANGED
@@ -39,7 +39,7 @@ jobs:
|
|
39 |
git config --local --unset-all http.https://github.com/.extraheader
|
40 |
git remote set-url origin https://${GH_PAT}@github.com/datenlabor-bmz/ai-language-monitor.git
|
41 |
git add results.json models.json languages.json
|
42 |
-
git commit -m "Update evaluation results
|
43 |
git push origin HEAD:main
|
44 |
|
45 |
- name: Upload to Hugging Face
|
|
|
39 |
git config --local --unset-all http.https://github.com/.extraheader
|
40 |
git remote set-url origin https://${GH_PAT}@github.com/datenlabor-bmz/ai-language-monitor.git
|
41 |
git add results.json models.json languages.json
|
42 |
+
git commit -m "Update evaluation results" || echo "No changes to commit"
|
43 |
git push origin HEAD:main
|
44 |
|
45 |
- name: Upload to Hugging Face
|
evals/backend.py
CHANGED
@@ -31,8 +31,8 @@ def make_model_table(df, models):
|
|
31 |
)
|
32 |
df["task_metric"] = df["task"] + "_" + df["metric"]
|
33 |
df = df.drop(columns=["task", "metric"])
|
34 |
-
df = df.pivot(index="model", columns="task_metric", values="score")
|
35 |
-
df["average"] = df[task_metrics].mean(axis=1)
|
36 |
df = df.sort_values(by="average", ascending=False).reset_index()
|
37 |
df = pd.merge(df, models, left_on="model", right_on="id", how="left")
|
38 |
df["rank"] = df.index + 1
|
@@ -65,10 +65,9 @@ def make_language_table(df, languages):
|
|
65 |
df = df.drop(columns=["task", "metric"])
|
66 |
df = (
|
67 |
df.pivot(index="bcp_47", columns="task_metric", values="score")
|
68 |
-
.fillna(0)
|
69 |
.reset_index()
|
70 |
)
|
71 |
-
df["average"] = df[task_metrics].mean(axis=1)
|
72 |
df = pd.merge(languages, df, on="bcp_47", how="outer")
|
73 |
df = df.sort_values(by="speakers", ascending=False)
|
74 |
df = df[
|
|
|
31 |
)
|
32 |
df["task_metric"] = df["task"] + "_" + df["metric"]
|
33 |
df = df.drop(columns=["task", "metric"])
|
34 |
+
df = df.pivot(index="model", columns="task_metric", values="score")
|
35 |
+
df["average"] = df[task_metrics].mean(axis=1, skipna=False)
|
36 |
df = df.sort_values(by="average", ascending=False).reset_index()
|
37 |
df = pd.merge(df, models, left_on="model", right_on="id", how="left")
|
38 |
df["rank"] = df.index + 1
|
|
|
65 |
df = df.drop(columns=["task", "metric"])
|
66 |
df = (
|
67 |
df.pivot(index="bcp_47", columns="task_metric", values="score")
|
|
|
68 |
.reset_index()
|
69 |
)
|
70 |
+
df["average"] = df[task_metrics].mean(axis=1, skipna=False)
|
71 |
df = pd.merge(languages, df, on="bcp_47", how="outer")
|
72 |
df = df.sort_values(by="speakers", ascending=False)
|
73 |
df = df[
|
frontend/src/components/HistoryPlot.js
CHANGED
@@ -4,6 +4,7 @@ import * as Plot from '@observablehq/plot'
|
|
4 |
const HistoryPlot = ({ data }) => {
|
5 |
const containerRef = useRef()
|
6 |
const models = [...data.model_table] // sort copy, not in place
|
|
|
7 |
.sort((a, b) => new Date(a.creation_date) - new Date(b.creation_date))
|
8 |
.reduce((acc, curr) => {
|
9 |
const last = acc[acc.length - 1]?.maxAverage || 0
|
|
|
4 |
const HistoryPlot = ({ data }) => {
|
5 |
const containerRef = useRef()
|
6 |
const models = [...data.model_table] // sort copy, not in place
|
7 |
+
.filter(d => d.average !== null)
|
8 |
.sort((a, b) => new Date(a.creation_date) - new Date(b.creation_date))
|
9 |
.reduce((acc, curr) => {
|
10 |
const last = acc[acc.length - 1]?.maxAverage || 0
|
frontend/src/components/ScoreField.js
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
const ScoreField = (score, minScore, maxScore) => {
|
|
|
|
|
|
|
2 |
// Calculate percentage based on the provided min and max scores
|
3 |
// This normalizes the score to a 0-100 range for visualization
|
4 |
const normalizedScore = Math.min(Math.max(score, minScore), maxScore)
|
5 |
-
|
6 |
((normalizedScore - minScore) / (maxScore - minScore)) * 100
|
7 |
|
8 |
// Continuous color gradient from red to green based on score
|
@@ -15,7 +18,8 @@ const ScoreField = (score, minScore, maxScore) => {
|
|
15 |
// Use a low opacity for subtlety (0.1-0.2 range)
|
16 |
const opacity = 0.1 + (percentage / 100) * 0.1
|
17 |
|
18 |
-
|
|
|
19 |
|
20 |
return (
|
21 |
<div
|
@@ -45,7 +49,7 @@ const ScoreField = (score, minScore, maxScore) => {
|
|
45 |
zIndex: 1
|
46 |
}}
|
47 |
>
|
48 |
-
{(score * 100).toFixed(1)}
|
49 |
</span>
|
50 |
</div>
|
51 |
)
|
|
|
1 |
const ScoreField = (score, minScore, maxScore) => {
|
2 |
+
let percentage = 100
|
3 |
+
let barColor = "rgba(210, 106, 255, 0.1)" // light violet for missing data
|
4 |
+
if (score !== null) {
|
5 |
// Calculate percentage based on the provided min and max scores
|
6 |
// This normalizes the score to a 0-100 range for visualization
|
7 |
const normalizedScore = Math.min(Math.max(score, minScore), maxScore)
|
8 |
+
percentage =
|
9 |
((normalizedScore - minScore) / (maxScore - minScore)) * 100
|
10 |
|
11 |
// Continuous color gradient from red to green based on score
|
|
|
18 |
// Use a low opacity for subtlety (0.1-0.2 range)
|
19 |
const opacity = 0.1 + (percentage / 100) * 0.1
|
20 |
|
21 |
+
barColor = `rgba(${red}, ${green}, 0, ${opacity.toFixed(2)})`
|
22 |
+
}
|
23 |
|
24 |
return (
|
25 |
<div
|
|
|
49 |
zIndex: 1
|
50 |
}}
|
51 |
>
|
52 |
+
{score !== null ? (score * 100).toFixed(1)+"%" : '–'}
|
53 |
</span>
|
54 |
</div>
|
55 |
)
|