sonsus's picture
new feat: o4-mini supported
9e784df
import asyncio
import random
from itertools import combinations
from time import time
from types import SimpleNamespace
from typing import Dict, List, Literal, Tuple
from varco_arena_core.prompts import ComparisonPromptBase
from .match import Match
async def limited_coro(coro, semaphore):
async with semaphore:
return await coro
class League:
"""
Full grid matches of the model outputs (all-play-all)
"""
def __init__(self, participants, evaluation_model):
self.participants = participants
self.evaluation_model = evaluation_model
async def async_run(
self,
prompt_obj: ComparisonPromptBase = None,
semaphore: asyncio.Semaphore = None,
) -> List[Dict]:
async def get_match_results(a, b) -> List[Dict]:
match = Match(A=a, B=b, eval_model=self.evaluation_model)
if self.evaluation_model == "debug":
winner, match_result = await match.async_dbg_eval()
else:
winner, match_result = await match.async_comp_eval(
comp_prompt=prompt_obj,
)
if winner is not None:
now_time = time()
# A vs B, B vs A
result = [
{ # 여기서 *_a, *_b 는 prompt내의 position에 관한 것이다. Match.A, Match.B 가 아니다.
"task": a.task, # participant_pair[1].task
"model_a": a.model_id,
"model_b": b.model_id,
"winner": "A"
if match_result[0]["A"] > match_result[0]["B"]
else "B",
"prob_a": match_result[0]["A"],
"prob_b": match_result[0]["B"],
"evaluation_model": self.evaluation_model,
"instruction": a.instruction, # participant_pair[1].instruction,
"source": a.source, # participant_pair[1].source,
"generated_a": a.generated,
"generated_b": b.generated,
"round": "league",
"match_order_in_round": "league",
"tstamp": now_time,
"api_call_kwargs": match_result[0]["api_call_kwargs"],
"actual_response_text": match_result[0]["actual_response_text"],
},
]
else:
result = None
return result
match_jobs = []
for participant_pair in combinations(self.participants, 2):
match_jobs.append(
limited_coro(
get_match_results(participant_pair[0], participant_pair[1]),
semaphore,
)
)
# match_jobs.append( get_match_results(participant_pair[0], participant_pair[1]) )
result = await asyncio.gather(*match_jobs)
result = [
_result
for result_pair in result
if result_pair is not None
for _result in result_pair
]
return result # returning only match result, not the winner's SimpleNamespace (because it would never be used.)