Spaces:
Running
Running
import asyncio | |
import os.path | |
import tempfile | |
import uuid | |
from typing import List | |
import gradio | |
import gradio as gr | |
import openai | |
import pandas as pd | |
from autorag.evaluator import Evaluator | |
from src.data.chunk import chunk | |
from src.data.parse import parse_pdf | |
from src.runner import GradioStreamRunner | |
from gradio import ChatMessage | |
root_dir = os.path.dirname(os.path.realpath(__file__)) | |
pseudo_trial_yaml_path = os.path.join(root_dir, "config", "init_project_for_pseudo_trial.yaml") | |
init_run_yaml = os.path.join(root_dir, "config", "init_project_for_run.yaml") | |
gradio_runner = None | |
# Code for Task 1 | |
def file_ingest(input_files: List[str], temp_project_dir, progress=gr.Progress()): | |
if os.getenv("OPENAI_API_KEY") is None: | |
return "Please submit your OpenAI API key first." | |
if not input_files: | |
return "Please upload a file first." | |
progress(0.05) | |
# do parse | |
raw_df = parse_pdf(file_lists=input_files) | |
progress(0.3) | |
# do chunk | |
corpus_df = chunk(raw_df, method="recursivecharacter", | |
lang="en", chunk_size=512, chunk_overlap=128) | |
progress(0.5) | |
asyncio.sleep(0.5) | |
# Logic for button click | |
empty_qa_df = make_empty_qa(corpus_df=corpus_df) | |
with tempfile.TemporaryDirectory() as temp_data_dir: | |
empty_qa_df.to_parquet(os.path.join(temp_data_dir, "empty_qa.parquet")) | |
corpus_df.to_parquet(os.path.join(temp_data_dir, "corpus.parquet")) | |
evaluator = Evaluator(qa_data_path=os.path.join(temp_data_dir, "empty_qa.parquet"), | |
corpus_data_path=os.path.join(temp_data_dir, "corpus.parquet"), | |
project_dir=temp_project_dir) | |
evaluator.start_trial(pseudo_trial_yaml_path, skip_validation=True) | |
yield "Setting up" | |
progress(0.9) | |
set_runner(temp_project_dir) | |
progress(1.0) | |
yield "File uploaded complete. You can use it at chatbot now." | |
def make_empty_qa(corpus_df: pd.DataFrame): | |
doc_id = corpus_df["doc_id"].iloc[0] | |
return pd.DataFrame({ | |
"qid": str(uuid.uuid4()), | |
"query": ["Who is Kai Havertz?"], | |
"retrieval_gt": [[[doc_id]]], | |
"generation_gt": [["Havertz is the greatest footballer."]], | |
}) | |
def on_submit_openai_key(openai_key): | |
os.environ["OPENAI_API_KEY"] = openai_key | |
# Test openai key | |
try: | |
client = openai.OpenAI() | |
response = client.chat.completions.create( | |
messages=[ | |
{"role": "user", "content": "What is the capital of France?"}, | |
], | |
model="gpt-4o-mini", | |
max_tokens=3, | |
) | |
assert isinstance(response.choices[0].message.content, str) | |
gr.Info("OpenAI API key submitted.", duration=3) | |
return "Setting complete." | |
except openai.AuthenticationError as e: | |
gr.Error("OpenAI API key is invalid.", duration=3) | |
return "Not Set" | |
except AssertionError as e: | |
gr.Error("OpenAI server is not working properly.", duration=3) | |
return "Not Set" | |
def set_runner(project_dir): | |
runner = GradioStreamRunner.from_yaml(yaml_path=init_run_yaml, project_dir=project_dir) | |
global gradio_runner | |
gradio_runner = runner | |
def get_response(history): | |
global gradio_runner | |
if gradio_runner is None: | |
gradio.Warning("Please set the AutoRAG server first.") | |
return | |
if os.getenv("OPENAI_API_KEY", None) is None: | |
gradio.Warning("Please submit your OpenAI API key first.") | |
return | |
history.append({"role": "assistant", "content": ""}) | |
for output in gradio_runner.stream_run(history[-2]["content"]): | |
stream_delta = output[0] | |
history[-1]["content"] = stream_delta | |
yield history | |
def user(user_message, history: list): | |
return "", history + [{"role": "user", "content": user_message}] | |
# interface one | |
with gr.Blocks(theme="earneleh/paris") as demo: | |
with tempfile.TemporaryDirectory() as project_dir: | |
# Define components | |
with gr.Row(): | |
with gr.Column(scale=3): | |
textbox = gr.Textbox(label="Please input your OpenAI API key and press Enter.", type="password", | |
info="You can get your API key from https://platform.openai.com/account/api-keys\n" | |
"AutoRAG do not store your API key.", | |
autofocus=True) | |
api_key_status_box = gr.Textbox(label="OpenAI API status", value="Not Set", interactive=False) | |
gr.Markdown("## Ingest Your Data") | |
file_input = gr.File(label="Upload Files", type="filepath", file_count="multiple") | |
button = gr.Button("Submit file") | |
text_output = gr.Textbox(label="Status update", interactive=False) | |
# Define layout and interactions | |
textbox.submit(on_submit_openai_key, inputs=[textbox], outputs=api_key_status_box) | |
button.click(file_ingest, inputs=[file_input, gr.State(project_dir)], outputs=[text_output]) | |
with gr.Column(scale=7): | |
gr.Markdown("## This is your Naive RAG Chatbot 🚀") | |
chatbot = gr.Chatbot(type="messages", height=600) | |
chat_input = gr.Textbox() | |
clear = gr.Button(value="Clear Chat🗑️") | |
chat_input.submit(user, [chat_input, chatbot], outputs=[chat_input, chatbot], queue=False).then( | |
get_response, inputs=chatbot, outputs=[chatbot] | |
) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
gr.Markdown("## Do you like the result?\n\nIf you don't like it, try to optimize it with AutoRAG. Press below button and go to make evaluation data and optimize it. Both on the Huggingface space so you don't need to install anything.") | |
with gr.Row(): | |
open_data_creation = gr.Button(value="1️⃣ : Data Creation", | |
link="https://huggingface.co/spaces/AutoRAG/AutoRAG-data-creation") | |
open_optimize = gr.Button(value="2️⃣ : Optimize", link="https://huggingface.co/spaces/AutoRAG/RAG-Pipeline-Optimization") | |
# if __name__ == "__main__": | |
# demo.launch(share=False, debug=True) | |
demo.launch(share=False, debug=False) | |