Spaces:
Running
Running
File size: 7,693 Bytes
c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 c9a97c2 cd01d35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import pandas as pd
import json
from datetime import datetime
def process_csv_to_json():
# Read the CSV file
df = pd.read_csv('src/record.csv')
# Clean the data: remove empty rows, rename columns
df = df.dropna(how='all')
df = df.rename(columns={
'dataset': 'Dataset',
'llm': 'LLM',
'score\n(EM)': 'Score',
'pass rate': 'Pass rate',
'Cost($)': 'Cost($)',
'Eval Date': 'Eval Date',
'framework': 'Framework',
'X-shot': 'X-shot',
'Nums': 'Samples',
'All tokens': 'All tokens',
'Total input tokens': 'Total input tokens',
'Average input tokens': 'Average input tokens',
'Total output tokens': 'Total output tokens',
'Average output tokens': 'Average output tokens'
})
# Helper function: handle number strings with commas
def parse_number(value):
if pd.isna(value) or value == '-':
return 0
# Remove commas, convert to float, then to int
return int(float(str(value).replace(',', '')))
# Initialize result dictionary
result = {
"time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"results": {}
}
# Get all unique LLMs
llms = df['LLM'].dropna().unique()
# Iterate through each algorithm
for algorithm in df['Algorithm'].dropna().unique():
if not isinstance(algorithm, str):
continue
result['results'][algorithm] = {}
# Process each LLM
for llm in llms:
llm_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)]
if llm_data.empty:
continue
# Create dictionary for each LLM
result['results'][algorithm][llm] = {
'META': {
'Algorithm': str(algorithm),
'LLM': str(llm),
'Eval Date': str(llm_data['Eval Date'].iloc[0])
}
}
# Process each dataset
for dataset in df['Dataset'].dropna().unique():
if not isinstance(dataset, str):
continue
dataset_data = llm_data[llm_data['Dataset'] == dataset]
if not dataset_data.empty:
data_row = dataset_data.iloc[0]
result['results'][algorithm][llm][dataset] = {
'Score': round(float(data_row['Score']) if data_row['Score'] != '-' else 0, 2), # Keep two decimal places
'Pass rate': round(float(data_row['Pass rate']) / 100, 4) if data_row['Pass rate'] != '-' else 0.0, # Convert to decimal and keep two decimal places
'Cost($)': float(data_row['Cost($)']) if pd.notnull(data_row['Cost($)']) and data_row['Cost($)'] != '-' else 0.0,
'Framework': str(data_row['Framework']) if 'Framework' in data_row and pd.notnull(data_row['Framework']) else '',
'X-shot': str(data_row['X-shot']) if pd.notnull(data_row['X-shot']) else '',
'Samples': parse_number(data_row['Samples']),
'All tokens': parse_number(data_row['All tokens']),
'Total input tokens': parse_number(data_row['Total input tokens']),
'Average input tokens': parse_number(data_row['Average input tokens']),
'Total output tokens': parse_number(data_row['Total output tokens']),
'Average output tokens': parse_number(data_row['Average output tokens'])
}
# Check if each field exists
required_fields = ['Score', 'Pass rate', 'Cost($)', 'Framework', 'X-shot', 'Samples', 'All tokens', 'Total input tokens', 'Average input tokens', 'Total output tokens', 'Average output tokens']
for key, value in result['results'].items():
for llm, datasets in value.items():
# Check META information
meta = datasets.get('META', {})
if 'LLM' not in meta or 'Eval Date' not in meta:
print(f"Missing META fields in algorithm '{key}' for LLM '{llm}'")
for dataset, data in datasets.items():
if dataset == 'META':
continue
missing_fields = [field for field in required_fields if field not in data]
if missing_fields:
print(f"Missing fields {missing_fields} in dataset '{dataset}' for LLM '{llm}' in algorithm '{key}'")
# Save as JSON file
with open('src/detail_math_score.json', 'w', encoding='utf-8') as f:
json.dump(result, f, indent=4, ensure_ascii=False)
def process_csv_to_overall_json():
# Read the CSV file
df = pd.read_csv('src/record.csv')
# Clean the data: remove empty rows, rename columns
df = df.dropna(how='all')
df = df.rename(columns={
'dataset': 'Dataset',
'llm': 'LLM',
'score\n(EM)': 'Score',
'Cost($)': 'Cost($)',
'Eval Date': 'Eval Date'
})
# Initialize result dictionary
result = {
"time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"results": {}
}
# Get all unique LLMs
llms = df['LLM'].dropna().unique()
for llm in llms:
# Process base algorithms
for algorithm in df['Algorithm'].dropna().unique():
if not isinstance(algorithm, str):
continue
# Add suffix for non-gpt-3.5-turbo models
# Modification: add more information for llama models to ensure uniqueness
algo_key = algorithm if llm == 'gpt-3.5-turbo' else f"{algorithm}-{llm}"
# Check if the algorithm-LLM combination exists
algo_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)]
if algo_data.empty:
print(f"No data found for algorithm '{algorithm}' and LLM '{llm}'")
continue
result['results'][algo_key] = {
"META": {
"Algorithm": algorithm,
"LLM": llm,
"Eval Date": str(algo_data['Eval Date'].iloc[0])
}
}
# Process each dataset
for dataset in ['gsm8k', 'AQuA', 'MATH-500']:
dataset_data = df[(df['Algorithm'] == algorithm) &
(df['Dataset'] == dataset) &
(df['LLM'] == llm)]
if not dataset_data.empty:
result['results'][algo_key][dataset] = {
"Score": float(dataset_data['Score'].iloc[0]) if pd.notnull(dataset_data['Score'].iloc[0]) and dataset_data['Score'].iloc[0] != '-' else 0.0,
"Cost($)": float(dataset_data['Cost($)'].iloc[0]) if pd.notnull(dataset_data['Cost($)'].iloc[0]) and dataset_data['Cost($)'].iloc[0] != '-' else 0.0
}
else:
# If the dataset is empty, ensure the key exists and set default values
result['results'][algo_key][dataset] = {
"Score": 0.0,
"Cost($)": 0.0
}
# Save as JSON file
with open('src/overall_math_score.json', 'w', encoding='utf-8') as f:
json.dump(result, f, indent=4, ensure_ascii=False)
if __name__ == "__main__":
# Generate JSON files in two formats
process_csv_to_json()
process_csv_to_overall_json() |