oliverwang15 commited on
Commit
d29770b
·
1 Parent(s): 8650c17

updates on the experiment

Browse files
app.py CHANGED
@@ -35,7 +35,9 @@ with gr.Blocks(theme="dark") as demo:
35
  openai_key = gr.Textbox(
36
  label='Enter your OpenAI API key here',
37
  type='password')
38
-
 
 
39
  file = gr.File(label='Upload your .txt or .pdf file here', file_types=['.txt', '.pdf'], file_count = 'multiple')
40
  btn_submit_txt_online = gr.Button(value='Submit')
41
  # btn_submit_txt.style(full_width=True)
@@ -43,7 +45,7 @@ with gr.Blocks(theme="dark") as demo:
43
  with gr.Group():
44
  gr.Markdown(f'<center><h2>Or Load Offline</h2></center>')
45
  questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions (Please don't change this part now)", info="Please select the question you want to ask")
46
- answer_type = gr.Radio(choices = ["ChatGPT_txt", "GPT4_txt", 'New_GPT_4_pdf'], label="Answer_type", info="Please select the type of answer you want to show")
47
  btn_submit_txt_offline = gr.Button(value='Show Answers')
48
  # btn_submit_txt.style(full_width=True)
49
 
@@ -119,7 +121,7 @@ with gr.Blocks(theme="dark") as demo:
119
  # Submit button
120
  btn_submit_txt_online.click(
121
  backend.process_file_online,
122
- inputs=[file, questions, openai_key],
123
  outputs=[filename_box, question_box, answer_box, reference_box, highlighted_text, correct_answer, correct_reference],
124
  )
125
 
 
35
  openai_key = gr.Textbox(
36
  label='Enter your OpenAI API key here',
37
  type='password')
38
+
39
+ model_selection = gr.Radio(choices = ["ChatGPT", "GPT4"], label="Model Selection", info="Please select the model you want to use")
40
+
41
  file = gr.File(label='Upload your .txt or .pdf file here', file_types=['.txt', '.pdf'], file_count = 'multiple')
42
  btn_submit_txt_online = gr.Button(value='Submit')
43
  # btn_submit_txt.style(full_width=True)
 
45
  with gr.Group():
46
  gr.Markdown(f'<center><h2>Or Load Offline</h2></center>')
47
  questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions (Please don't change this part now)", info="Please select the question you want to ask")
48
+ answer_type = gr.Radio(choices = ["ChatGPT_txt", "GPT4_txt", 'New_GPT_4_pdf', 'Exp_text', 'Exp_Group_A', 'Exp_Group_B'], label="Answer_type", info="Please select the type of answer you want to show")
49
  btn_submit_txt_offline = gr.Button(value='Show Answers')
50
  # btn_submit_txt.style(full_width=True)
51
 
 
121
  # Submit button
122
  btn_submit_txt_online.click(
123
  backend.process_file_online,
124
+ inputs=[file, questions, openai_key, model_selection],
125
  outputs=[filename_box, question_box, answer_box, reference_box, highlighted_text, correct_answer, correct_reference],
126
  )
127
 
backend.py CHANGED
@@ -73,7 +73,7 @@ class Backend:
73
 
74
  return text
75
 
76
- def process_file_online(self, file, questions, openai_key, progress = gr.Progress()):
77
  # record the questions
78
  self.questions = questions
79
 
@@ -83,10 +83,16 @@ class Backend:
83
  # make the prompt
84
  prompt_list = [self.prompt.get(text, questions, 'v3') for text in self.text_list]
85
 
 
 
 
 
 
 
86
  # interact with openai
87
  self.res_list = []
88
  for prompt in progress.tqdm(prompt_list, desc = 'Generating answers...'):
89
- res = self.agent(prompt, with_history = False, temperature = 0.1, model = 'gpt-3.5-turbo-16k', api_key = openai_key)
90
  res = self.prompt.process_result(res, 'v3')
91
  self.res_list.append(res)
92
 
@@ -362,29 +368,42 @@ class Backend:
362
  df = pd.read_csv('./offline_results/results_all_gpt4.csv')
363
  elif answer_type == 'New_GPT_4_pdf':
364
  df = pd.read_csv('./offline_results/results_new_pdf.csv')
 
 
 
 
 
 
365
 
366
  # make the prompt
367
  self.res_list = self.phase_df(df)
368
- if answer_type == 'ChatGPT_txt' or answer_type == 'GPT4_txt':
369
- txt_root_path = './20230808-AI coding-1st round'
370
- self.filename_list = df['fn'].tolist()
371
- elif answer_type == 'New_GPT_4_pdf':
372
- txt_root_path = './new_pdfs'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  self.filename_list = df['fn'].tolist()
374
- self.filename_list = ['.'.join(f.split('.')[:-1]) + '.txt' for f in self.filename_list]
375
 
376
- self.text_list = []
377
- for file in progress.tqdm(self.filename_list):
378
- if file.split('.')[-1] == 'pdf':
379
- # convert pdf to txt
380
- text = self.phrase_pdf(os.path.join(txt_root_path, file))
381
-
382
- else:
383
- text_path = os.path.join(txt_root_path, file)
384
- with open(text_path, 'r', encoding='utf-8') as f:
385
- text = f.read()
386
-
387
- self.text_list.append(text)
388
 
389
  # Use the first file as default
390
  # Use the first question for multiple questions
 
73
 
74
  return text
75
 
76
+ def process_file_online(self, file, questions, openai_key, model_selection, progress = gr.Progress()):
77
  # record the questions
78
  self.questions = questions
79
 
 
83
  # make the prompt
84
  prompt_list = [self.prompt.get(text, questions, 'v3') for text in self.text_list]
85
 
86
+ # select the model
87
+ if model_selection == 'ChatGPT':
88
+ model = 'gpt-3.5-turbo-16k'
89
+ elif model_selection == 'GPT4':
90
+ model = 'gpt-4-1106-preview'
91
+
92
  # interact with openai
93
  self.res_list = []
94
  for prompt in progress.tqdm(prompt_list, desc = 'Generating answers...'):
95
+ res = self.agent(prompt, with_history = False, temperature = 0.1, model = model, api_key = openai_key)
96
  res = self.prompt.process_result(res, 'v3')
97
  self.res_list.append(res)
98
 
 
368
  df = pd.read_csv('./offline_results/results_all_gpt4.csv')
369
  elif answer_type == 'New_GPT_4_pdf':
370
  df = pd.read_csv('./offline_results/results_new_pdf.csv')
371
+ elif answer_type == 'Exp_text':
372
+ df = pd.read_csv('./offline_results/exp_test.csv')
373
+ elif answer_type == 'Exp_Group_A':
374
+ df = pd.read_csv('./offline_results/exp_ga.csv')
375
+ elif answer_type == 'Exp_Group_B':
376
+ df = pd.read_csv('./offline_results/exp_gb.csv')
377
 
378
  # make the prompt
379
  self.res_list = self.phase_df(df)
380
+
381
+ if answer_type in ['ChatGPT_txt', 'GPT4_txt', 'New_GPT_4_pdf']:
382
+ if answer_type == 'ChatGPT_txt' or answer_type == 'GPT4_txt':
383
+ txt_root_path = './20230808-AI coding-1st round'
384
+ self.filename_list = df['fn'].tolist()
385
+ elif answer_type == 'New_GPT_4_pdf':
386
+ txt_root_path = './new_pdfs'
387
+ self.filename_list = df['fn'].tolist()
388
+ self.filename_list = ['.'.join(f.split('.')[:-1]) + '.txt' for f in self.filename_list]
389
+
390
+ self.text_list = []
391
+ for file in progress.tqdm(self.filename_list):
392
+ if file.split('.')[-1] == 'pdf':
393
+ # convert pdf to txt
394
+ text = self.phrase_pdf(os.path.join(txt_root_path, file))
395
+
396
+ else:
397
+ text_path = os.path.join(txt_root_path, file)
398
+ with open(text_path, 'r', encoding='utf-8') as f:
399
+ text = f.read()
400
+
401
+ self.text_list.append(text)
402
+
403
+ elif answer_type in ['Exp_text', 'Exp_Group_A', 'Exp_Group_B']:
404
  self.filename_list = df['fn'].tolist()
405
+ self.text_list = df['content'].tolist()
406
 
 
 
 
 
 
 
 
 
 
 
 
 
407
 
408
  # Use the first file as default
409
  # Use the first question for multiple questions
offline_results/exp_ga.csv ADDED
The diff for this file is too large to render. See raw diff
 
offline_results/exp_gb.csv ADDED
The diff for this file is too large to render. See raw diff
 
offline_results/exp_test.csv ADDED
The diff for this file is too large to render. See raw diff