abdeljalilELmajjodi commited on
Commit
7e920cb
·
verified ·
1 Parent(s): 69b7702

add utils...

Browse files
Files changed (1) hide show
  1. utils.py +148 -0
utils.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from openai import OpenAI
3
+ import os
4
+ from tqdm import tqdm
5
+ import pandas as pd
6
+ from pathlib import Path
7
+ from datasets import Dataset,load_dataset,concatenate_datasets
8
+ import asyncio
9
+ import threading
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+ HF_READ=os.environ["HF_READ"]
14
+ HF_WRITE=os.environ["HF_WRITE"]
15
+
16
+ model_base_url={}
17
+ LANGUAGE="MOROCCAN Arabic"
18
+ HF_DATASET="abdeljalilELmajjodi/Mohadata"
19
+ SYSTEM_PROMPT = {
20
+ "role": "system",
21
+ "content": f"""This is a context-based Q&A game where two AIs interact with a user-provided context. All interactions MUST be in {LANGUAGE}.
22
+
23
+ QUESTIONER_AI:
24
+ - Must only ask questions that can be answered from the provided context
25
+ - Should identify key information gaps or unclear points
26
+ - Cannot ask questions about information not present in the context
27
+ - Must communicate exclusively in {LANGUAGE}
28
+
29
+ ANSWERER_AI:
30
+ - Must only answer using information explicitly stated in the context
31
+ - Cannot add external information or assumptions
32
+ - Must indicate if a question cannot be answered from the context alone
33
+ - Must communicate exclusively in {LANGUAGE}"""
34
+ }
35
+
36
+ def add_model(model_name,base_url,api_key):
37
+ model_base_url[model_name]=base_url
38
+ #model_quest.choices=list(model_base_url.keys())
39
+ #print(model_quest)
40
+ os.environ[model_name]=api_key
41
+ return gr.Dropdown(label="Questioner Model",choices=list(model_base_url.keys())),gr.Dropdown(label="Answerer Model",choices=list(model_base_url.keys()))
42
+
43
+
44
+ def model_init(model):
45
+ try:
46
+ api_key=os.environ.get(model)
47
+ base_url=model_base_url[model]
48
+ client = OpenAI(api_key=api_key, base_url=base_url)
49
+ return client
50
+ except Exception as e:
51
+ print(f"You should add api key of {model}")
52
+
53
+ # generate questions
54
+ def init_req_messages(sample_context):
55
+ messages_quest=[
56
+ SYSTEM_PROMPT,
57
+ {
58
+ "role":"user",
59
+ "content":f"""Context for analysis:
60
+ {sample_context}
61
+ As QUESTIONER_AI, generate a question based on this context.
62
+ """
63
+ }
64
+ ]
65
+ return messages_quest
66
+ # generate Answers
67
+ def init_resp_messages(sample_context,question):
68
+ messages_answ=[
69
+ SYSTEM_PROMPT,
70
+ {
71
+ "role": "user",
72
+ "content": f"""
73
+ Context for analysis:
74
+ {sample_context}
75
+ Question: {question}
76
+ As ANSWERER_AI, answer this question using only information from the context.
77
+ """}
78
+
79
+ ]
80
+ return messages_answ
81
+
82
+ def chat_generation(client,model_name,messages):
83
+ return client.chat.completions.create(
84
+ model=model_name,
85
+ messages=messages,
86
+ temperature=0.5
87
+ ).choices[0].message.content
88
+
89
+ def generate_question(client,model_name,messages_quest):
90
+ question=chat_generation(client,model_name,messages_quest)
91
+ messages_quest.append({"role":"assistant","content":question})
92
+ return question
93
+
94
+ def generate_answer(client,model_name,messages_answ):
95
+ answer=chat_generation(client,model_name,messages_answ)
96
+ messages_answ.append({"role":"assistant","content":answer})
97
+ return answer
98
+
99
+ def load_upload_ds_hf(df):
100
+ dataset_stream=load_dataset("atlasia/Mohadata_Dataset",token=HF_READ,split="train")
101
+ print("[INFO] dataset loaded successfully")
102
+ new_ds=Dataset.from_pandas(df,preserve_index=False)
103
+ updated_ds=concatenate_datasets([dataset_stream,new_ds])
104
+ updated_ds.push_to_hub("abdeljalilELmajjodi/Mohadata",token=HF_WRITE)
105
+ print("[INFO] dataset uploaded successfully")
106
+
107
+ async def load_upload_ds_hf_async(df):
108
+ await asyncio.to_thread(load_upload_ds_hf,df)
109
+
110
+
111
+
112
+ def save_conversation(conversation,context,num_rounds):
113
+ conv_flat={"user":[],"assistant":[]}
114
+ for i in range(0,len(conversation)):
115
+ conv_flat[conversation[i]["role"]].append(conversation[i]["content"])
116
+ conv_flat["context"]=[context]*num_rounds
117
+ df=pd.DataFrame(conv_flat)
118
+ df.to_csv("data.csv")
119
+ print("[INFO] conversation saved successfully")
120
+ print("[INFO] uploading dataset to huggingface")
121
+
122
+ thread=threading.Thread(target=load_upload_ds_hf,args=(df,))
123
+ thread.daemon=True
124
+ thread.start()
125
+
126
+ return Path("data.csv").name
127
+
128
+ def user_input(context,model_a,model_b,num_rounds,conversation_history):
129
+ conversation_history.clear()
130
+ client_quest=model_init(model_a)
131
+ client_ans=model_init(model_b)
132
+ messages_quest=init_req_messages(context)
133
+ for round_num in tqdm(range(num_rounds)):
134
+ question = generate_question(client_quest,model_a,messages_quest)
135
+ conversation_history.append(
136
+ {"role":"user","content":question},
137
+ )
138
+ if round_num==0:
139
+ messages_answ=init_resp_messages(context,question)
140
+ else:
141
+ messages_answ.append({"role":"user","content":question})
142
+ answer = generate_answer(client_ans,model_b,messages_answ)
143
+ messages_quest.append({"role":"user","content":answer})
144
+ conversation_history.append(
145
+ {"role":"assistant","content":answer},
146
+ )
147
+ file_path=save_conversation(conversation_history,context,num_rounds)
148
+ return conversation_history,gr.DownloadButton(label="Save Conversation",value=file_path,visible=True)