File size: 20,242 Bytes
fdcf80f
a71589c
 
 
 
25096bb
a71589c
aac308a
68ebf3b
 
b074ed1
a71589c
 
 
 
 
 
 
68ebf3b
 
 
 
 
 
 
 
 
 
e54ab55
 
 
 
 
 
7fcd57a
e54ab55
 
 
 
 
 
 
7fcd57a
e54ab55
 
 
 
 
 
c542faf
05b511a
c542faf
2aa8f92
c8005a5
0072900
 
 
b9eb2d7
a71589c
 
1f75214
25096bb
 
 
 
 
54536dd
25096bb
 
 
 
 
1d16515
25096bb
a71589c
42e9076
4cb1f6b
 
aac308a
bc935c0
2887740
bc935c0
42e9076
fdcf80f
4cb1f6b
 
 
a71589c
 
 
 
 
 
 
 
 
 
 
4cb1f6b
fdcf80f
a71589c
 
 
 
 
 
 
 
 
 
 
 
 
66af638
 
a71589c
66af638
 
a71589c
66af638
a71589c
9f857bc
a71589c
16b20e5
66af638
a71589c
68ebf3b
 
66af638
a71589c
66af638
a71589c
66af638
a71589c
66af638
a71589c
68ebf3b
e54ab55
68ebf3b
e54ab55
66af638
 
 
68ebf3b
aac308a
 
 
4cb1f6b
4417df5
aac308a
 
a71589c
4417df5
aac308a
 
a71589c
 
 
 
4417df5
a71589c
 
 
4417df5
a71589c
 
aac308a
 
a71589c
 
4cb1f6b
 
a71589c
 
 
4adfce5
 
 
 
 
 
a71589c
 
4417df5
 
a71589c
 
 
 
 
 
 
 
 
 
 
 
4417df5
a71589c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293ffcd
aac308a
 
4cb1f6b
 
a71589c
 
 
4cb1f6b
4a8ce3f
4cb1f6b
a71589c
4cb1f6b
a71589c
 
4417df5
a71589c
4cb1f6b
 
aac308a
a71589c
 
4cb1f6b
 
a71589c
 
 
 
4cb1f6b
a71589c
 
 
 
4cb1f6b
a71589c
aac308a
a71589c
4cb1f6b
a71589c
c542faf
a71589c
 
 
05b511a
9a338f5
 
05b511a
a71589c
9a338f5
 
 
 
a71589c
9a338f5
 
 
a71589c
 
7d020dd
a71589c
e54ab55
 
 
05b511a
c542faf
 
e54ab55
 
05b511a
c542faf
 
a71589c
4cb1f6b
a71589c
c542faf
a71589c
 
 
9a338f5
 
 
78733fd
a71589c
9a338f5
 
 
 
a71589c
9a338f5
 
 
a71589c
 
3697927
a71589c
4417df5
 
 
 
 
 
 
 
 
 
 
a71589c
4cb1f6b
a71589c
66af638
4417df5
a71589c
 
9a338f5
 
 
78733fd
a71589c
9a338f5
 
 
 
a71589c
9a338f5
 
 
a71589c
 
dd0a469
66af638
 
 
 
 
 
 
 
 
 
 
4417df5
a71589c
 
aac308a
4417df5
e114200
aac308a
a71589c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
import gradio as gr
import json
import requests
import os
from model_inference import Inference
import time
HF_TOKEN = os.environ.get("HF_TOKEN")

question_selector_map = {}

model_list = ["llama2", "llama2-chat", "vicuna", "falcon", "falcon-instruct", "orca", "wizardlm"]

with open("src/inference_endpoint.json", "r") as f:
    inference_endpoint = json.load(f)

for i in range(len(model_list)):
    inference_endpoint[model_list[i]]["headers"]["Authorization"] += HF_TOKEN

def build_question_selector_map(questions):
    question_selector_map = {}

    # Build question selector map
    for q in questions:
        preview = f"{q['question_id']+1}: " + q["question"][:128] + "..."
        question_selector_map[preview] = q

    return question_selector_map

def math_display_question_answer(question, cot, request: gr.Request):
    if cot:
        q = math_cot_question_selector_map[question]
    else:
        q = math_question_selector_map[question]

    return q["agent_response"]["llama"][0], q["agent_response"]["wizardlm"][0], q["agent_response"]["orca"][0], q["summarization"][0], q["agent_response"]["llama"][1], q["agent_response"]["wizardlm"][1], q["agent_response"]["orca"][1], q["summarization"][1],  q["agent_response"]["llama"][2], q["agent_response"]["wizardlm"][2], q["agent_response"]["orca"][2]

def gsm_display_question_answer(question, cot, request: gr.Request):
    if cot:
        q = gsm_cot_question_selector_map[question]
    else:
        q = gsm_question_selector_map[question]

    return q["agent_response"]["llama"][0], q["agent_response"]["wizardlm"][0], q["agent_response"]["orca"][0], q["summarization"][0], q["agent_response"]["llama"][1], q["agent_response"]["wizardlm"][1], q["agent_response"]["orca"][1], q["summarization"][1],  q["agent_response"]["llama"][2], q["agent_response"]["wizardlm"][2], q["agent_response"]["orca"][2]

def mmlu_display_question_answer(question, cot, request: gr.Request):
    if cot:
        q = mmlu_cot_question_selector_map[question]
    else:
        q = mmlu_question_selector_map[question]

    return q["agent_response"]["llama"][0], q["agent_response"]["wizardlm"][0], q["agent_response"]["orca"][0], q["summarization"][0], q["agent_response"]["llama"][1], q["agent_response"]["wizardlm"][1], q["agent_response"]["orca"][1], q["summarization"][1], q["agent_response"]["llama"][2], q["agent_response"]["wizardlm"][2], q["agent_response"]["orca"][2]
    
warmup_test = ["llama2", "wizardlm", "orca"]
def warmup(model_list=warmup_test, model_inference_endpoints=inference_endpoint):
    for model in model_list:
        API_URL = model_inference_endpoints[model]["API_URL"]
        headers = model_inference_endpoints[model]["headers"]
        headers["Authorization"] += HF_TOKEN

        def query(payload):
            return requests.post(API_URL, headers=headers, json=payload)
            
        output = query({
            "inputs": "Hello. "
        })
        
    time.sleep(1)
    return {
        options: gr.update(visible=True),
        inputbox: gr.update(visible=True),
        submit: gr.update(visible=True),
        warmup_button: gr.update(visible=False),
        welcome_message: gr.update(visible=True)
    }

def inference(model_list, question, API_KEY, cot, hf_token=HF_TOKEN):
    if len(model_list) != 3:
        raise gr.Error("Please choose just '3' models! Neither more nor less!")

    for i in range(len(model_list)):
        model_list[i] = model_list[i].lower()

    model_response = Inference(model_list, question, API_KEY, cot, hf_token)

    return {
        output_msg: gr.update(visible=True),
        output_col: gr.update(visible=True),
        model1_output1: model_response["agent_response"][model_list[0]][0],
        model2_output1: model_response["agent_response"][model_list[1]][0],
        model3_output1: model_response["agent_response"][model_list[2]][0],
        summarization_text1: model_response["summarization"][0],
        model1_output2: model_response["agent_response"][model_list[0]][1],
        model2_output2: model_response["agent_response"][model_list[1]][1],
        model3_output2: model_response["agent_response"][model_list[2]][1],
        summarization_text2: model_response["summarization"][1],
        model1_output3: model_response["agent_response"][model_list[0]][2],
        model2_output3: model_response["agent_response"][model_list[1]][2],
        model3_output3: model_response["agent_response"][model_list[2]][2]
    }

def load_responses():
    with open("result/Math/math_result.json", "r") as math_file:
        math_responses = json.load(math_file)

    with open("result/Math/math_result_cot.json", "r") as math_cot_file:
        math_cot_responses = json.load(math_cot_file)

    with open("result/GSM8K/gsm_result.json", "r") as gsm_file:
        gsm_responses = json.load(gsm_file)

    with open("result/GSM8K/gsm_result_cot.json", "r") as gsm_cot_file:
        gsm_cot_responses = json.load(gsm_cot_file)

    with open("result/MMLU/mmlu_result.json", "r") as mmlu_file:
        mmlu_responses = json.load(mmlu_file)

    with open("result/MMLU/mmlu_result_cot.json", "r") as mmlu_cot_file:
        mmlu_cot_responses = json.load(mmlu_cot_file)
    
    return math_responses, math_cot_responses, gsm_responses, gsm_cot_responses, mmlu_responses, mmlu_cot_responses

def load_questions(math, gsm, mmlu):
    math_questions = []
    gsm_questions = []
    mmlu_questions = []
    for i in range(100):
        math_questions.append(f"{i+1}: " + math[i]["question"][:128] + "...")
        gsm_questions.append(f"{i+1}: " + gsm[i]["question"][:128] + "...")
        mmlu_questions.append(f"{i+1}: " + mmlu[i]["question"][:128] + "...")

    return math_questions, gsm_questions, mmlu_questions

math_result, math_cot_result, gsm_result, gsm_cot_result, mmlu_result, mmlu_cot_result = load_responses()

math_questions, gsm_questions, mmlu_questions = load_questions(math_result, gsm_result, mmlu_result)

math_question_selector_map = build_question_selector_map(math_result)
math_cot_question_selector_map = build_question_selector_map(math_cot_result)
gsm_question_selector_map = build_question_selector_map(gsm_result)
gsm_cot_question_selector_map = build_question_selector_map(gsm_cot_result)
mmlu_question_selector_map = build_question_selector_map(mmlu_result)
mmlu_cot_question_selector_map = build_question_selector_map(mmlu_cot_result)


TITLE = """<h1 align="center">LLM Agora 🗣️🏦</h1>"""

INTRODUCTION_TEXT = """
The **LLM Agora** 🗣️🏦 aims to improve the quality of open-source LMs' responses through debate & revision introduced in [Improving Factuality and Reasoning in Language Models through Multiagent Debate](https://arxiv.org/abs/2305.14325).
Thank you to the authors of this paper for suggesting a great idea!

Do you know that? 🤔 **LLMs can also improve their responses by debating with other LLMs**! 😮 We applied this concept to several open-source LMs to verify that the open-source model, not the proprietary one, can sufficiently improve the response through discussion. 🤗
For more details, please refer to the [GitHub Repository](https://github.com/gauss5930/LLM-Agora).
You can also check the results in this Space!

You can use LLM Agora with your own questions if the response of open-source LM is not satisfactory and you want to improve the quality!
The Math, GSM8K, and MMLU Tabs show the results of the experiment(Llama2, WizardLM2, Orca2), and for inference, please use the 'Inference' tab.

Here's how to use LLM Agora!

1. Before starting, click the 'Warm-up LLM Agora 🔥' button and wait until 'LLM Agora Ready!!' appears. (Suggest to go grab a coffee☕ since it takes 5 minutes!)
2. Choose just 3 models! Neither more nor less!
3. Check the CoT box if you want to utilize the Chain-of-Thought while inferencing.
4. Please fill in your OpenAI API KEY, it will be used to use ChatGPT to summarize the responses.
5. Type your question in the Question box and click the 'Submit' button! If you do so, LLM Agora will show you improved answers! 🤗 (It will take roughly a minute! Please wait for an answer!)

For more detailed information, please check '※ Specific information about LLM Agora' at the bottom of the page.
"""

WELCOME_TEXT = """<h1 align="center">🤗🔥 Welcome to LLM Agora 🔥🤗</h1>"""

RESPONSE_TEXT = """<h1 align="center">🤗 Here are the responses to each model!! 🤗</h1>"""

SPECIFIC_INFORMATION = """
This is the specific information about LLM Agora!

**Tasks**

- Math: The problem of arithmetic operations on six randomly selected numbers. The format is '{}+{}*{}+{}-{}*{}=?'
- GSM8K: GSM8K is a dataset of 8.5K high quality linguistically diverse grade school math word problems created by human problem writers.
- MMLU: MMLU (Massive Multitask Language Understanding) is a new benchmark designed to measure knowledge acquired during pretraining by evaluating models exclusively in zero-shot and few-shot settings.

**Model size**

Besides Falcon, all other models are based on Llama2.

|Model name|Model size|
|---|---|
|Llama2|13B|
|Llama2-Chat|13B|
|Vicuna|13B|
|Falcon|7B|
|Falcon-Instruct|7B|
|WizardLM|13B|
|Orca|13B|

**Agent numbers & Debate rounds**

- We limit the number of agents and debate rounds because of the limitation of resources. As a result, we decided to use 3 agents and 2 rounds of debate!

**GitHub Repository**

- If you want to see more specific information, please check the [GitHub Repository](https://github.com/gauss5930/LLM-Agora) of LLM Agora!

**Citation**

```
@article{du2023improving,
  title={Improving Factuality and Reasoning in Language Models through Multiagent Debate},
  author={Du, Yilun and Li, Shuang and Torralba, Antonio and Tenenbaum, Joshua B and Mordatch, Igor},
  journal={arXiv preprint arXiv:2305.14325},
  year={2023}
}
```
"""

with gr.Blocks() as demo:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT)
    with gr.Column():
        with gr.Tab("Inference"):
            warmup_button = gr.Button("Warm-up LLM Agora 🔥", visible=True)
            welcome_message = gr.HTML(WELCOME_TEXT, visible=False)
            with gr.Row(visible=False) as options:
                with gr.Column():
                    model_list = gr.CheckboxGroup(["Llama2", "Llama2-Chat", "Vicuna", "Falcon", "Falcon-Instruct", "WizardLM", "Orca"], label="Model Selection", info="Choose 3 LMs to participate in LLM Agora.", type="value")
                    cot = gr.Checkbox(label="CoT", info="Do you want to use CoT for inference?")
                with gr.Column() as inputbox:
                    API_KEY = gr.Textbox(label="OpenAI API Key", value="", info="Please fill in your OpenAI API token.", placeholder="sk..", type="password")
            with gr.Column(visible=False) as inputbox:
                question = gr.Textbox(label="Question", value="", info="Please type your question!", placeholder="")
            submit = gr.Button("Submit", visible=False)

            with gr.Row(visible=False) as output_msg:
                gr.HTML(RESPONSE_TEXT)

            with gr.Column(visible=False) as output_col:
                with gr.Row(elem_id="model1_response"):
                    model1_output1 = gr.Textbox(label="1️⃣ model's initial response")
                    model2_output1 = gr.Textbox(label="2️⃣ model's initial response")
                    model3_output1 = gr.Textbox(label="3️⃣ model's initial response")
                summarization_text1 = gr.Textbox(lebel="Summarization 1")
                with gr.Row(elem_id="model2_response"):
                    model1_output2 = gr.Textbox(label="1️⃣ model's revised response")
                    model2_output2 = gr.Textbox(label="2️⃣ model's revised response")
                    model3_output2 = gr.Textbox(label="3️⃣ model's revised response")
                summarization_text2 = gr.Textbox(label="Summarization 2")
                with gr.Row(elem_id="model3_response"):
                    model1_output3 = gr.Textbox(label="1️⃣ model's final response")
                    model2_output3 = gr.Textbox(label="2️⃣ model's final response")
                    model3_output3 = gr.Textbox(label="3️⃣ model's final response")

        
        with gr.Tab("Math"):
            math_cot = gr.Checkbox(label="CoT", info="If you want to see CoT result, please check the box.")
            math_question_list = gr.Dropdown(math_questions, value=callable(math_questions), label="Math Question", every=0.1)

            with gr.Column():
                with gr.Row(elem_id="model1_response"):
                    math_model1_output1 = gr.Textbox(label="Llama2🦙's 1️⃣st response")
                    math_model2_output1 = gr.Textbox(label="WizardLM🧙‍♂️'s 1️⃣st response")
                    math_model3_output1 = gr.Textbox(label="Orca🐬's 1️⃣st response")
                math_summarization_text1 = gr.Textbox(label="Summarization 1️⃣")
                with gr.Row(elem_id="model2_response"):
                    math_model1_output2 = gr.Textbox(label="Llama2🦙's 2️⃣nd response")
                    math_model2_output2 = gr.Textbox(label="WizardLM🧙‍♂️'s 2️⃣nd response")
                    math_model3_output2 = gr.Textbox(label="Orca🐬's 2️⃣nd response")
                math_summarization_text2 = gr.Textbox(label="Summarization 2️⃣")
                with gr.Row(elem_id="model3_response"):
                    math_model1_output3 = gr.Textbox(label="Llama2🦙's 3️⃣rd response")
                    math_model2_output3 = gr.Textbox(label="WizardLM🧙‍♂️'s 3️⃣rd response")
                    math_model3_output3 = gr.Textbox(label="Orca🐬's 3️⃣rd response")

            gr.HTML("""<h1 align="center"> The result of Math </h1>""")
            gr.HTML("""<p align="center"><img src='https://github.com/gauss5930/LLM-Agora/assets/80087878/4fc22896-1306-4a93-bd54-a7a2ff184c98'></p>""")

        math_cot.select(
            math_display_question_answer,
            [math_question_list, math_cot],
            [math_model1_output1, math_model2_output1, math_model3_output1, math_summarization_text1, math_model1_output2, math_model2_output2, math_model3_output2, math_summarization_text2, math_model1_output3, math_model2_output3, math_model3_output3]
        )
        math_question_list.change(
            math_display_question_answer,
            [math_question_list, math_cot],
            [math_model1_output1, math_model2_output1, math_model3_output1, math_summarization_text1, math_model1_output2, math_model2_output2, math_model3_output2, math_summarization_text2, math_model1_output3, math_model2_output3, math_model3_output3]
        )


        with gr.Tab("GSM8K"):
            gsm_cot = gr.Checkbox(label="CoT", info="If you want to see CoT result, please check the box.")
            gsm_question_list = gr.Dropdown(gsm_questions, value=callable(gsm_questions), label="GSM8K Question", every=0.1)

            with gr.Column():
                with gr.Row(elem_id="model1_response"):
                    gsm_model1_output1 = gr.Textbox(label="Llama2🦙's 1️⃣st response")
                    gsm_model2_output1 = gr.Textbox(label="WizardLM🧙‍♂️'s 1️⃣st response")
                    gsm_model3_output1 = gr.Textbox(label="Orca🐬's 1️⃣st response")
                gsm_summarization_text1 = gr.Textbox(label="Summarization 1️⃣")
                with gr.Row(elem_id="model2_response"):
                    gsm_model1_output2 = gr.Textbox(label="Llama2🦙's 2️⃣nd response")
                    gsm_model2_output2 = gr.Textbox(label="WizardLM🧙‍♂️'s 2️⃣nd response")
                    gsm_model3_output2 = gr.Textbox(label="Orca🐬's 2️⃣nd response")
                gsm_summarization_text2 = gr.Textbox(label="Summarization 2️⃣")
                with gr.Row(elem_id="model3_response"):
                    gsm_model1_output3 = gr.Textbox(label="Llama2🦙's 3️⃣rd response")
                    gsm_model2_output3 = gr.Textbox(label="WizardLM🧙‍♂️'s 3️⃣rd response")
                    gsm_model3_output3 = gr.Textbox(label="Orca🐬's 3️⃣rd response")

            gr.HTML("""<h1 align="center"> The result of GSM8K </h1>""")
            gr.HTML("""<p align="center"><img src="https://github.com/gauss5930/LLM-Agora/assets/80087878/64f05ea4-5bec-41e4-83d7-d8855e753290"></p>""")

        gsm_cot.select(
            gsm_display_question_answer,
            [gsm_question_list, gsm_cot],
            [gsm_model1_output1, gsm_model2_output1, gsm_model3_output1, gsm_summarization_text1, gsm_model1_output2, gsm_model2_output2, gsm_model3_output2, gsm_summarization_text2, gsm_model1_output3, gsm_model2_output3, gsm_model3_output3]
        )
        gsm_question_list.change(
            gsm_display_question_answer,
            [gsm_question_list, gsm_cot],
            [gsm_model1_output1, gsm_model2_output1, gsm_model3_output1, gsm_summarization_text1, gsm_model1_output2, gsm_model2_output2, gsm_model3_output2, gsm_summarization_text2, gsm_model1_output3, gsm_model2_output3, gsm_model3_output3]
        )


        with gr.Tab("MMLU"):
            mmlu_cot = gr.Checkbox(label="CoT", info="If you want to see CoT result, please check the box.")
            mmlu_question_list = gr.Dropdown(mmlu_questions, value=callable(mmlu_questions), label="MMLU Question", every=0.1)
            
            with gr.Column():
                with gr.Row(elem_id="model1_response"):
                    mmlu_model1_output1 = gr.Textbox(label="Llama2🦙's 1️⃣st response")
                    mmlu_model2_output1 = gr.Textbox(label="WizardLM🧙‍♂️'s 1️⃣st response")
                    mmlu_model3_output1 = gr.Textbox(label="Orca🐬's 1️⃣st response")
                mmlu_summarization_text1 = gr.Textbox(label="Summarization 1️⃣")
                with gr.Row(elem_id="model2_response"):
                    mmlu_model1_output2 = gr.Textbox(label="Llama2🦙's 2️⃣nd response")
                    mmlu_model2_output2 = gr.Textbox(label="WizardLM🧙‍♂️'s 2️⃣nd response")
                    mmlu_model3_output2 = gr.Textbox(label="Orca🐬's 2️⃣nd response")
                mmlu_summarization_text2 = gr.Textbox(label="Summarization 2️⃣")
                with gr.Row(elem_id="model3_response"):
                    mmlu_model1_output3 = gr.Textbox(label="Llama2🦙's 3️⃣rd response")
                    mmlu_model2_output3 = gr.Textbox(label="WizardLM🧙‍♂️'s 3️⃣rd response")
                    mmlu_model3_output3 = gr.Textbox(label="Orca🐬's 3️⃣rd response")

            gr.HTML("""<h1 align="center"> The result of MMLU </h1>""")
            gr.HTML("""<p align="center"><img src="https://github.com/composable-models/llm_multiagent_debate/assets/80087878/963571aa-228b-4d73-9082-5f528552383e"></p>""")

        mmlu_cot.select(
            mmlu_display_question_answer,
            [mmlu_question_list, mmlu_cot],
            [mmlu_model1_output1, mmlu_model2_output1, mmlu_model3_output1, mmlu_summarization_text1, mmlu_model1_output2, mmlu_model2_output2, mmlu_model3_output2, mmlu_summarization_text2, mmlu_model1_output3, mmlu_model2_output3, mmlu_model3_output3]
        )
        mmlu_question_list.change(
            mmlu_display_question_answer,
            [mmlu_question_list, mmlu_cot],
            [mmlu_model1_output1, mmlu_model2_output1, mmlu_model3_output1, mmlu_summarization_text1, mmlu_model1_output2, mmlu_model2_output2, mmlu_model3_output2, mmlu_summarization_text2, mmlu_model1_output3, mmlu_model2_output3, mmlu_model3_output3]
        )

        with gr.Accordion("※ Specific information about LLM Agora", open=False):
            gr.Markdown(SPECIFIC_INFORMATION)

    warmup_button.click(warmup, [], [options, inputbox, submit, warmup_button, welcome_message])
    submit.click(inference, [model_list, question, API_KEY, cot], [output_msg, output_col, model1_output1, model2_output1, model3_output1, summarization_text1, model1_output2, model2_output2, model3_output2, summarization_text2, model1_output3, model2_output3, model3_output3])

demo.launch()