|
import gradio as gr |
|
from openai import OpenAI |
|
import os |
|
from tqdm import tqdm |
|
import pandas as pd |
|
from pathlib import Path |
|
from datasets import Dataset,load_dataset,concatenate_datasets |
|
import asyncio |
|
import threading |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
HF_READ=os.environ["HF_READ"] |
|
HF_WRITE=os.environ["HF_WRITE"] |
|
print(HF_READ,HF_WRITE) |
|
model_base_url={} |
|
LANGUAGE="MOROCCAN Arabic" |
|
HF_DATASET="abdeljalilELmajjodi/Mohadata" |
|
SYSTEM_PROMPT = { |
|
"role": "system", |
|
"content": f"""This is a context-based Q&A game where two AIs interact with a user-provided context. All interactions MUST be in {LANGUAGE}. |
|
|
|
QUESTIONER_AI: |
|
- Must only ask questions that can be answered from the provided context |
|
- Should identify key information gaps or unclear points |
|
- Cannot ask questions about information not present in the context |
|
- Must communicate exclusively in {LANGUAGE} |
|
|
|
ANSWERER_AI: |
|
- Must only answer using information explicitly stated in the context |
|
- Cannot add external information or assumptions |
|
- Must indicate if a question cannot be answered from the context alone |
|
- Must communicate exclusively in {LANGUAGE}""" |
|
} |
|
|
|
def add_model(model_name,base_url,api_key): |
|
model_base_url[model_name]=base_url |
|
|
|
|
|
os.environ[model_name]=api_key |
|
return gr.Dropdown(label="Questioner Model",choices=list(model_base_url.keys())),gr.Dropdown(label="Answerer Model",choices=list(model_base_url.keys())) |
|
|
|
|
|
def model_init(model): |
|
try: |
|
api_key=os.environ.get(model) |
|
base_url=model_base_url[model] |
|
client = OpenAI(api_key=api_key, base_url=base_url) |
|
return client |
|
except Exception as e: |
|
print(f"You should add api key of {model}") |
|
|
|
|
|
def init_req_messages(sample_context): |
|
messages_quest=[ |
|
SYSTEM_PROMPT, |
|
{ |
|
"role":"user", |
|
"content":f"""Context for analysis: |
|
{sample_context} |
|
As QUESTIONER_AI, generate a question based on this context. |
|
""" |
|
} |
|
] |
|
return messages_quest |
|
|
|
def init_resp_messages(sample_context,question): |
|
messages_answ=[ |
|
SYSTEM_PROMPT, |
|
{ |
|
"role": "user", |
|
"content": f""" |
|
Context for analysis: |
|
{sample_context} |
|
Question: {question} |
|
As ANSWERER_AI, answer this question using only information from the context. |
|
"""} |
|
|
|
] |
|
return messages_answ |
|
|
|
def chat_generation(client,model_name,messages): |
|
return client.chat.completions.create( |
|
model=model_name, |
|
messages=messages, |
|
temperature=0.5 |
|
).choices[0].message.content |
|
|
|
def generate_question(client,model_name,messages_quest): |
|
question=chat_generation(client,model_name,messages_quest) |
|
messages_quest.append({"role":"assistant","content":question}) |
|
return question |
|
|
|
def generate_answer(client,model_name,messages_answ): |
|
answer=chat_generation(client,model_name,messages_answ) |
|
messages_answ.append({"role":"assistant","content":answer}) |
|
return answer |
|
|
|
def load_upload_ds_hf(df): |
|
dataset_stream=load_dataset("atlasia/Mohadata_Dataset",token=HF_READ,split="train") |
|
print("[INFO] dataset loaded successfully") |
|
new_ds=Dataset.from_pandas(df,preserve_index=False) |
|
updated_ds=concatenate_datasets([dataset_stream,new_ds]) |
|
updated_ds.push_to_hub("atlasia/Mohadata_Dataset",token=HF_WRITE) |
|
print("[INFO] dataset uploaded successfully") |
|
|
|
async def load_upload_ds_hf_async(df): |
|
await asyncio.to_thread(load_upload_ds_hf,df) |
|
|
|
|
|
|
|
def save_conversation(conversation,context,num_rounds): |
|
conv_flat={"user":[],"assistant":[]} |
|
for i in range(0,len(conversation)): |
|
conv_flat[conversation[i]["role"]].append(conversation[i]["content"]) |
|
conv_flat["context"]=[context]*num_rounds |
|
df=pd.DataFrame(conv_flat) |
|
df.to_csv("data.csv") |
|
print("[INFO] conversation saved successfully") |
|
print("[INFO] uploading dataset to huggingface") |
|
|
|
thread=threading.Thread(target=load_upload_ds_hf,args=(df,)) |
|
thread.daemon=True |
|
thread.start() |
|
|
|
return Path("data.csv").name |
|
|
|
def user_input(context,model_a,model_b,num_rounds,conversation_history): |
|
conversation_history.clear() |
|
client_quest=model_init(model_a) |
|
client_ans=model_init(model_b) |
|
messages_quest=init_req_messages(context) |
|
for round_num in tqdm(range(num_rounds)): |
|
question = generate_question(client_quest,model_a,messages_quest) |
|
conversation_history.append( |
|
{"role":"user","content":question}, |
|
) |
|
if round_num==0: |
|
messages_answ=init_resp_messages(context,question) |
|
else: |
|
messages_answ.append({"role":"user","content":question}) |
|
answer = generate_answer(client_ans,model_b,messages_answ) |
|
messages_quest.append({"role":"user","content":answer}) |
|
conversation_history.append( |
|
{"role":"assistant","content":answer}, |
|
) |
|
file_path=save_conversation(conversation_history,context,num_rounds) |
|
return conversation_history,gr.DownloadButton(label="Save Conversation",value=file_path,visible=True) |
|
|