davidpomerenke commited on
Commit
1e8952a
·
verified ·
1 Parent(s): 6f68367

Upload from GitHub Actions: Display N/A scores as such

Browse files
.github/workflows/nightly-evals.yml CHANGED
@@ -39,7 +39,7 @@ jobs:
39
  git config --local --unset-all http.https://github.com/.extraheader
40
  git remote set-url origin https://${GH_PAT}@github.com/datenlabor-bmz/ai-language-monitor.git
41
  git add results.json models.json languages.json
42
- git commit -m "Update evaluation results [skip ci]" || echo "No changes to commit"
43
  git push origin HEAD:main
44
 
45
  - name: Upload to Hugging Face
 
39
  git config --local --unset-all http.https://github.com/.extraheader
40
  git remote set-url origin https://${GH_PAT}@github.com/datenlabor-bmz/ai-language-monitor.git
41
  git add results.json models.json languages.json
42
+ git commit -m "Update evaluation results" || echo "No changes to commit"
43
  git push origin HEAD:main
44
 
45
  - name: Upload to Hugging Face
evals/backend.py CHANGED
@@ -31,8 +31,8 @@ def make_model_table(df, models):
31
  )
32
  df["task_metric"] = df["task"] + "_" + df["metric"]
33
  df = df.drop(columns=["task", "metric"])
34
- df = df.pivot(index="model", columns="task_metric", values="score").fillna(0)
35
- df["average"] = df[task_metrics].mean(axis=1)
36
  df = df.sort_values(by="average", ascending=False).reset_index()
37
  df = pd.merge(df, models, left_on="model", right_on="id", how="left")
38
  df["rank"] = df.index + 1
@@ -65,10 +65,9 @@ def make_language_table(df, languages):
65
  df = df.drop(columns=["task", "metric"])
66
  df = (
67
  df.pivot(index="bcp_47", columns="task_metric", values="score")
68
- .fillna(0)
69
  .reset_index()
70
  )
71
- df["average"] = df[task_metrics].mean(axis=1)
72
  df = pd.merge(languages, df, on="bcp_47", how="outer")
73
  df = df.sort_values(by="speakers", ascending=False)
74
  df = df[
 
31
  )
32
  df["task_metric"] = df["task"] + "_" + df["metric"]
33
  df = df.drop(columns=["task", "metric"])
34
+ df = df.pivot(index="model", columns="task_metric", values="score")
35
+ df["average"] = df[task_metrics].mean(axis=1, skipna=False)
36
  df = df.sort_values(by="average", ascending=False).reset_index()
37
  df = pd.merge(df, models, left_on="model", right_on="id", how="left")
38
  df["rank"] = df.index + 1
 
65
  df = df.drop(columns=["task", "metric"])
66
  df = (
67
  df.pivot(index="bcp_47", columns="task_metric", values="score")
 
68
  .reset_index()
69
  )
70
+ df["average"] = df[task_metrics].mean(axis=1, skipna=False)
71
  df = pd.merge(languages, df, on="bcp_47", how="outer")
72
  df = df.sort_values(by="speakers", ascending=False)
73
  df = df[
frontend/src/components/HistoryPlot.js CHANGED
@@ -4,6 +4,7 @@ import * as Plot from '@observablehq/plot'
4
  const HistoryPlot = ({ data }) => {
5
  const containerRef = useRef()
6
  const models = [...data.model_table] // sort copy, not in place
 
7
  .sort((a, b) => new Date(a.creation_date) - new Date(b.creation_date))
8
  .reduce((acc, curr) => {
9
  const last = acc[acc.length - 1]?.maxAverage || 0
 
4
  const HistoryPlot = ({ data }) => {
5
  const containerRef = useRef()
6
  const models = [...data.model_table] // sort copy, not in place
7
+ .filter(d => d.average !== null)
8
  .sort((a, b) => new Date(a.creation_date) - new Date(b.creation_date))
9
  .reduce((acc, curr) => {
10
  const last = acc[acc.length - 1]?.maxAverage || 0
frontend/src/components/ScoreField.js CHANGED
@@ -1,8 +1,11 @@
1
  const ScoreField = (score, minScore, maxScore) => {
 
 
 
2
  // Calculate percentage based on the provided min and max scores
3
  // This normalizes the score to a 0-100 range for visualization
4
  const normalizedScore = Math.min(Math.max(score, minScore), maxScore)
5
- const percentage =
6
  ((normalizedScore - minScore) / (maxScore - minScore)) * 100
7
 
8
  // Continuous color gradient from red to green based on score
@@ -15,7 +18,8 @@ const ScoreField = (score, minScore, maxScore) => {
15
  // Use a low opacity for subtlety (0.1-0.2 range)
16
  const opacity = 0.1 + (percentage / 100) * 0.1
17
 
18
- const barColor = `rgba(${red}, ${green}, 0, ${opacity.toFixed(2)})`
 
19
 
20
  return (
21
  <div
@@ -45,7 +49,7 @@ const ScoreField = (score, minScore, maxScore) => {
45
  zIndex: 1
46
  }}
47
  >
48
- {(score * 100).toFixed(1)}%
49
  </span>
50
  </div>
51
  )
 
1
  const ScoreField = (score, minScore, maxScore) => {
2
+ let percentage = 100
3
+ let barColor = "rgba(210, 106, 255, 0.1)" // light violet for missing data
4
+ if (score !== null) {
5
  // Calculate percentage based on the provided min and max scores
6
  // This normalizes the score to a 0-100 range for visualization
7
  const normalizedScore = Math.min(Math.max(score, minScore), maxScore)
8
+ percentage =
9
  ((normalizedScore - minScore) / (maxScore - minScore)) * 100
10
 
11
  // Continuous color gradient from red to green based on score
 
18
  // Use a low opacity for subtlety (0.1-0.2 range)
19
  const opacity = 0.1 + (percentage / 100) * 0.1
20
 
21
+ barColor = `rgba(${red}, ${green}, 0, ${opacity.toFixed(2)})`
22
+ }
23
 
24
  return (
25
  <div
 
49
  zIndex: 1
50
  }}
51
  >
52
+ {score !== null ? (score * 100).toFixed(1)+"%" : '–'}
53
  </span>
54
  </div>
55
  )