Pratik Bhavsar commited on
Commit
9941c78
·
1 Parent(s): f901e89

added new mistral medium

Browse files
Files changed (1) hide show
  1. results_v2.csv +1 -1
results_v2.csv CHANGED
@@ -1,5 +1,6 @@
1
  Model,Vendor,Avg AC,Avg TSQ,Avg Total Cost,Avg Session Duration,Avg Turns,Banking AC,Healthcare AC,Insurance AC,Investment AC,Telecom AC,Banking TSQ,Healthcare TSQ,Insurance TSQ,Investment TSQ,Telecom TSQ,Avg Input Cost ($),Avg Output Cost ($),Banking Cost,Healthcare Cost,Insurance Cost,Investment Cost,Telecom Cost,Banking Duration,Healthcare Duration,Insurance Duration,Investment Duration,Telecom Duration,Banking Turns,Healthcare Turns,Insurance Turns,Investment Turns,Telecom Turns,Model Type,$/M input token,$/M output token,Output Type
2
  gpt-4.1-2025-04-14,OpenAI,0.62,0.8,0.0684,24.32,3.1,0.6,0.62,0.66,0.64,0.58,0.81,0.83,0.68,0.88,0.82,0.0577,0.0107,0.052,0.0711,0.0629,0.0777,0.0783,18.52,24.4,25.24,27.88,25.58,2.61,3.15,2.92,3.3,3.48,Proprietary,2.0,8.0,Normal
 
3
  gpt-4.1-mini-2025-04-14,OpenAI,0.56,0.79,0.0141,26.0,3.43,0.56,0.6,0.46,0.5,0.64,0.8,0.85,0.63,0.84,0.83,0.0123,0.0018,0.0115,0.0143,0.0131,0.0164,0.0156,21.28,26.82,23.32,30.5,28.07,2.99,3.32,3.28,3.76,3.79,Proprietary,0.4,1.6,Normal
4
  claude-sonnet-4-20250514,Anthropic,0.55,0.92,0.1537,66.6,2.89,0.58,0.62,0.53,0.49,0.53,0.9,0.95,0.93,0.92,0.9,0.1212,0.0325,0.1359,0.1542,0.1442,0.1669,0.1675,55.36,57.93,56.87,86.44,76.38,2.54,2.84,2.79,3.06,3.22,Proprietary,3.0,15.0,Normal
5
  kimi-k2-instruct,Moonshot AI,0.53,0.9,0.0386,163.62,2.84,0.58,0.49,0.58,0.47,0.53,0.89,0.91,0.88,0.93,0.91,0.0346,0.004,0.0344,0.0401,0.0367,0.0419,0.0397,165.45,155.42,164.9,161.14,171.17,2.58,2.81,2.79,3.1,2.93,Open source,1.0,3.0,Normal
@@ -20,4 +21,3 @@ llama-3.3-70b-instruct,Meta,0.2,0.62,0.0599,19.92,3.83,0.11,0.29,0.29,0.14,0.16,
20
  caller,Arcee,0.16,0.65,0.0297,25.66,4.2,0.23,0.14,0.22,0.09,0.12,0.69,0.6,0.68,0.61,0.67,0.0282,0.0015,0.0262,0.0303,0.0305,0.0331,0.0286,22.83,25.54,26.42,29.66,23.85,3.76,4.19,4.18,4.75,4.14,Open source,0.55,0.85,Normal
21
  nova-lite-v1,Amazon,0.16,0.55,0.0031,20.26,3.73,0.12,0.18,0.19,0.15,0.18,0.48,0.49,0.58,0.72,0.49,0.0027,0.0004,0.0026,0.0033,0.0031,0.0034,0.0029,17.53,20.61,19.67,24.28,19.2,3.31,4.13,3.57,4.04,3.62,Proprietary,0.06,0.24,Normal
22
  magistral-small-2506,Mistral,0.16,0.53,0.0301,17.42,5.68,0.23,0.18,0.13,0.16,0.12,0.57,0.46,0.42,0.62,0.6,0.0275,0.0026,0.0245,0.0335,0.0302,0.034,0.0281,14.53,21.36,14.65,19.67,16.87,4.74,6.28,6.14,6.06,5.19,Open source,0.5,1.5,Reasoning
23
- mistral-medium-2505,Mistral,0.16,0.52,0.0293,34.17,6.27,0.2,0.16,0.18,0.13,0.13,0.45,0.5,0.46,0.63,0.56,0.0256,0.0037,0.025,0.037,0.0328,0.0269,0.0251,30.07,39.7,36.76,31.84,32.49,5.61,7.75,7.08,5.68,5.23,Proprietary,0.4,2.0,Normal
 
1
  Model,Vendor,Avg AC,Avg TSQ,Avg Total Cost,Avg Session Duration,Avg Turns,Banking AC,Healthcare AC,Insurance AC,Investment AC,Telecom AC,Banking TSQ,Healthcare TSQ,Insurance TSQ,Investment TSQ,Telecom TSQ,Avg Input Cost ($),Avg Output Cost ($),Banking Cost,Healthcare Cost,Insurance Cost,Investment Cost,Telecom Cost,Banking Duration,Healthcare Duration,Insurance Duration,Investment Duration,Telecom Duration,Banking Turns,Healthcare Turns,Insurance Turns,Investment Turns,Telecom Turns,Model Type,$/M input token,$/M output token,Output Type
2
  gpt-4.1-2025-04-14,OpenAI,0.62,0.8,0.0684,24.32,3.1,0.6,0.62,0.66,0.64,0.58,0.81,0.83,0.68,0.88,0.82,0.0577,0.0107,0.052,0.0711,0.0629,0.0777,0.0783,18.52,24.4,25.24,27.88,25.58,2.61,3.15,2.92,3.3,3.48,Proprietary,2.0,8.0,Normal
3
+ mistral-medium-2508,Mistral,0.61,0.77,0.0199,37.45,2.98,0.57,0.6,0.7,0.57,0.59,0.74,0.75,0.73,0.87,0.76,0.0164,0.0035,0.0185,0.0196,0.0195,0.0223,0.0195,31.91,34.94,36.96,43.91,39.53,3.06,2.85,2.92,3.12,2.97,Proprietary,0.4,2.0,Normal
4
  gpt-4.1-mini-2025-04-14,OpenAI,0.56,0.79,0.0141,26.0,3.43,0.56,0.6,0.46,0.5,0.64,0.8,0.85,0.63,0.84,0.83,0.0123,0.0018,0.0115,0.0143,0.0131,0.0164,0.0156,21.28,26.82,23.32,30.5,28.07,2.99,3.32,3.28,3.76,3.79,Proprietary,0.4,1.6,Normal
5
  claude-sonnet-4-20250514,Anthropic,0.55,0.92,0.1537,66.6,2.89,0.58,0.62,0.53,0.49,0.53,0.9,0.95,0.93,0.92,0.9,0.1212,0.0325,0.1359,0.1542,0.1442,0.1669,0.1675,55.36,57.93,56.87,86.44,76.38,2.54,2.84,2.79,3.06,3.22,Proprietary,3.0,15.0,Normal
6
  kimi-k2-instruct,Moonshot AI,0.53,0.9,0.0386,163.62,2.84,0.58,0.49,0.58,0.47,0.53,0.89,0.91,0.88,0.93,0.91,0.0346,0.004,0.0344,0.0401,0.0367,0.0419,0.0397,165.45,155.42,164.9,161.14,171.17,2.58,2.81,2.79,3.1,2.93,Open source,1.0,3.0,Normal
 
21
  caller,Arcee,0.16,0.65,0.0297,25.66,4.2,0.23,0.14,0.22,0.09,0.12,0.69,0.6,0.68,0.61,0.67,0.0282,0.0015,0.0262,0.0303,0.0305,0.0331,0.0286,22.83,25.54,26.42,29.66,23.85,3.76,4.19,4.18,4.75,4.14,Open source,0.55,0.85,Normal
22
  nova-lite-v1,Amazon,0.16,0.55,0.0031,20.26,3.73,0.12,0.18,0.19,0.15,0.18,0.48,0.49,0.58,0.72,0.49,0.0027,0.0004,0.0026,0.0033,0.0031,0.0034,0.0029,17.53,20.61,19.67,24.28,19.2,3.31,4.13,3.57,4.04,3.62,Proprietary,0.06,0.24,Normal
23
  magistral-small-2506,Mistral,0.16,0.53,0.0301,17.42,5.68,0.23,0.18,0.13,0.16,0.12,0.57,0.46,0.42,0.62,0.6,0.0275,0.0026,0.0245,0.0335,0.0302,0.034,0.0281,14.53,21.36,14.65,19.67,16.87,4.74,6.28,6.14,6.06,5.19,Open source,0.5,1.5,Reasoning