|
""" |
|
A function to parse an lm_eval text outputs into json format |
|
""" |
|
import os |
|
import json |
|
|
|
|
|
def txt2json(file): |
|
"""Convert lm_eval text file to json format""" |
|
with open(file) as fh: |
|
lang = file.split('_')[-1].split('.txt')[0] |
|
data = fh.read().split('hf (')[1:] |
|
print(len(data)) |
|
for evaluation in data: |
|
metadata = {} |
|
results = {} |
|
lines = evaluation.split('\n') |
|
header, batch_size = lines[0].split('batch_size: ') |
|
metadata['batch_size'] = batch_size |
|
header = header.split(',') |
|
for entry in header: |
|
entry = entry.strip() |
|
if not entry: continue |
|
if '=' in entry: |
|
key, value = entry.split('=') |
|
elif 'batch_size: ' in entry: |
|
key = 'batch_size' |
|
value = entry.split('batch_size: ')[1] |
|
print(key, value) |
|
else: |
|
key, value = entry.split(': ') |
|
if value.endswith(')'): |
|
value = value[:-1] |
|
if value.startswith('('): |
|
value = value[1:] |
|
if not key == 'dtype': |
|
try: |
|
value = eval(value) |
|
except Exception as e: |
|
pass |
|
if key == 'pretrained': |
|
value = value.split('/')[-1] |
|
pretrained = value |
|
metadata[key] = value |
|
print(metadata) |
|
task = '' |
|
alias = '' |
|
for line in lines[1:]: |
|
if line.startswith('|'): |
|
columns = line.split('|') |
|
_, tasks, version, filter, nshot, metric, _1, value, _2, stderr, _3 = columns |
|
tasks = columns[1].strip() |
|
if tasks == 'Tasks': continue |
|
if '--' in tasks: continue |
|
if tasks == 'Groups': continue |
|
aliases = tasks |
|
tasks = tasks.split('- ')[-1] |
|
if tasks: |
|
task = tasks |
|
alias = aliases |
|
results[task] = {} |
|
|
|
|
|
|
|
if version.strip(): |
|
try: |
|
results[task]['version'] = float(version.strip()) |
|
except Exception as e: |
|
print(e) |
|
if nshot.strip(): |
|
results[task]['nshot'] = int(nshot.strip()) |
|
|
|
metric = metric.strip() |
|
value = value.strip() |
|
filter = filter.strip() |
|
|
|
results[task]['alias'] = alias |
|
results[task][f'{metric},{filter}'] = float(value.strip()) |
|
results[task]['stderr'] = float(stderr.strip()) |
|
|
|
output = {'config': metadata, "results": results} |
|
print(output) |
|
pretrained = pretrained[0].upper()+pretrained[1:-1]+ pretrained[-1].upper() |
|
with open(f'{pretrained}_{lang}.json', 'w') as f: |
|
json.dump(output, f, ensure_ascii=False, indent=2) |