davidpomerenke commited on
Commit
c9e9db6
·
verified ·
1 Parent(s): 3409596

Upload from GitHub Actions: Exclude free models from evals

Browse files
Files changed (2) hide show
  1. evals/main.py +1 -1
  2. evals/models.py +18 -16
evals/main.py CHANGED
@@ -20,7 +20,7 @@ async def evaluate():
20
  print("running evaluations")
21
  old_results = pd.read_json("results.json")
22
  results = [
23
- task(model, lang.bcp_47, i)
24
  for task_name, task in tasks.items()
25
  for i in range(n_sentences)
26
  for lang in languages.iloc[:n_languages].itertuples()
 
20
  print("running evaluations")
21
  old_results = pd.read_json("results.json")
22
  results = [
23
+ task(task, model, lang, i)
24
  for task_name, task in tasks.items()
25
  for i in range(n_sentences)
26
  for lang in languages.iloc[:n_languages].itertuples()
evals/models.py CHANGED
@@ -45,7 +45,7 @@ models = [
45
  ]
46
 
47
  blocklist = [
48
- "google/gemini-2.5-pro-exp-03-25" # rate limit too low
49
  ]
50
 
51
  transcription_models = [
@@ -62,9 +62,10 @@ cache = Memory(location=".cache", verbose=0).cache
62
  def get_models(date: date):
63
  return get("https://openrouter.ai/api/frontend/models").json()["data"]
64
 
65
- def get_slug(permaslug):
 
66
  models = get_models(date.today())
67
- slugs = [m["slug"] for m in models if m["permaslug"] == permaslug]
68
  return slugs[0] if len(slugs) == 1 else None
69
 
70
 
@@ -80,7 +81,7 @@ def get_historical_popular_models(date: date):
80
  continue
81
  counts[model.split(":")[0]] += count
82
  counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)
83
- return [get_slug(model) for model, _ in counts]
84
 
85
 
86
  @cache
@@ -89,15 +90,21 @@ def get_current_popular_models(date: date):
89
  data = re.search(r'{\\"rankMap\\":(.*)\}\]\\n"\]\)</script>', raw).group(1)
90
  data = json.loads(data.replace("\\", ""))["day"]
91
  data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
92
- return [get_slug(model["model_permaslug"]) for model in data]
93
 
94
- models += [
95
- m for m in get_historical_popular_models(date.today()) if m and m not in models and m not in blocklist
96
- ][:5]
97
- models += [
98
- m for m in get_current_popular_models(date.today()) if m and m not in models and m not in blocklist
99
- ][:5]
100
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  load_dotenv()
103
  client = AsyncOpenAI(
@@ -153,11 +160,6 @@ async def transcribe(path, model="elevenlabs/scribe_v1"):
153
  models = pd.DataFrame(models, columns=["id"])
154
 
155
 
156
- @cache
157
- def get_models(date):
158
- return get("https://openrouter.ai/api/frontend/models/").json()["data"]
159
-
160
-
161
  def get_or_metadata(id):
162
  # get metadata from OpenRouter
163
  models = get_models(date.today())
 
45
  ]
46
 
47
  blocklist = [
48
+ "google/gemini-2.5-pro-exp-03-25" # rate limit too low
49
  ]
50
 
51
  transcription_models = [
 
62
  def get_models(date: date):
63
  return get("https://openrouter.ai/api/frontend/models").json()["data"]
64
 
65
+
66
+ def get_model(permaslug):
67
  models = get_models(date.today())
68
+ slugs = [m for m in models if m["permaslug"] == permaslug]
69
  return slugs[0] if len(slugs) == 1 else None
70
 
71
 
 
81
  continue
82
  counts[model.split(":")[0]] += count
83
  counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)
84
+ return [get_model(model) for model, _ in counts]
85
 
86
 
87
  @cache
 
90
  data = re.search(r'{\\"rankMap\\":(.*)\}\]\\n"\]\)</script>', raw).group(1)
91
  data = json.loads(data.replace("\\", ""))["day"]
92
  data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
93
+ return [get_model(model["model_permaslug"]) for model in data]
94
 
 
 
 
 
 
 
95
 
96
+ popular_models = get_historical_popular_models(
97
+ date.today()
98
+ ) + get_current_popular_models(date.today())
99
+ popular_models = [get_model(m) for m in popular_models if get_model(m)]
100
+ popular_models = [
101
+ m for m in popular_models if m["endpoint"] and not m["endpoint"]["is_free"]
102
+ ]
103
+ popular_models = [m["slug"] for m in popular_models]
104
+ popular_models = [
105
+ m for m in popular_models if m and m not in models and m not in blocklist
106
+ ]
107
+ models += popular_models[:5]
108
 
109
  load_dotenv()
110
  client = AsyncOpenAI(
 
160
  models = pd.DataFrame(models, columns=["id"])
161
 
162
 
 
 
 
 
 
163
  def get_or_metadata(id):
164
  # get metadata from OpenRouter
165
  models = get_models(date.today())