File size: 8,037 Bytes
e1aa0dd
 
 
 
 
 
 
ce2e9c5
e1aa0dd
 
 
ce2e9c5
e1aa0dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34bee60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce2e9c5
 
 
 
 
 
 
 
 
dba0e44
ce2e9c5
 
e1aa0dd
dba0e44
 
e1aa0dd
 
 
34bee60
 
 
 
 
e1aa0dd
210dd42
e1aa0dd
 
 
 
 
dba0e44
e1aa0dd
 
 
 
 
 
 
 
 
 
 
 
210dd42
 
 
 
 
 
 
 
 
34bee60
3991214
34bee60
 
 
 
e1aa0dd
 
34bee60
e1aa0dd
 
 
 
 
 
ce2e9c5
 
 
dba0e44
ce2e9c5
e1aa0dd
dba0e44
34bee60
e1aa0dd
 
34bee60
 
210dd42
e1aa0dd
 
ce2e9c5
 
210dd42
 
 
e1aa0dd
8c56b70
e1aa0dd
 
 
 
 
 
 
 
 
 
 
 
 
85ad89d
 
b1cda75
85ad89d
e1aa0dd
 
 
50f5b14
 
 
 
efd141a
 
 
50f5b14
 
 
 
efd141a
 
 
50f5b14
 
ce2e9c5
 
e1aa0dd
210dd42
e1aa0dd
210dd42
e1aa0dd
 
 
 
 
 
 
 
210dd42
e1aa0dd
210dd42
e1aa0dd
 
ce2e9c5
 
e1aa0dd
 
210dd42
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import gradio as gr
import pandas as pd
import io
import base64
import uuid
import pixeltable as pxt
import numpy as np
from pixeltable.iterators import DocumentSplitter
from pixeltable.functions.huggingface import sentence_transformer
from pixeltable.functions import openai
from gradio.themes import Monochrome
from huggingface_hub import HfApi, HfFolder

import os
import getpass

# Store API keys
if 'OPENAI_API_KEY' not in os.environ:
    os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API key:')

# Set up embedding function
@pxt.expr_udf
def e5_embed(text: str) -> np.ndarray:
    return sentence_transformer(text, model_id='intfloat/e5-large-v2')

# Create prompt function
@pxt.udf
def create_prompt(top_k_list: list[dict], question: str) -> str:
    concat_top_k = '\n\n'.join(
        elt['text'] for elt in reversed(top_k_list)
    )
    return f'''
    PASSAGES:
    {concat_top_k}
    QUESTION:
    {question}'''

# New UDF for creating messages
@pxt.udf
def create_messages(prompt: str) -> list[dict]:
    """Creates a structured message list for the LLM from the prompt"""
    return [
        {
            'role': 'system',
            'content': 'Answer questions using only the provided context. If the context lacks sufficient information, state this clearly.'
        },
        {
            'role': 'user',
            'content': prompt
        }
    ]

def validate_token(token):
    try:
        api = HfApi()
        user_info = api.whoami(token=token)
        return user_info is not None
    except Exception:
        return False

def process_files(token, pdf_files, chunk_limit, chunk_separator):
    if not validate_token(token):
        return "Invalid token. Please enter a valid Hugging Face token."
    
    # Initialize Pixeltable
    pxt.drop_dir('chatbot_demo', force=True)
    pxt.create_dir('chatbot_demo')

    # Create a table to store the uploaded PDF documents
    t = pxt.create_table(
        'chatbot_demo.documents',
        {
            'document': pxt.DocumentType(nullable=True),
            'question': pxt.StringType(nullable=True)
        }
    )

    # Insert the PDF files into the documents table
    t.insert({'document': file.name} for file in pdf_files if file.name.endswith('.pdf'))

    # Create a view that splits the documents into smaller chunks
    chunks_t = pxt.create_view(
        'chatbot_demo.chunks',
        t,
        iterator=DocumentSplitter.create(
            document=t.document,
            separators=chunk_separator,
            limit=chunk_limit if chunk_separator in ["token_limit", "char_limit"] else None,
            metadata='title,heading,sourceline'
        )
    )

    # Add an embedding index to the chunks for similarity search
    chunks_t.add_embedding_index('text', string_embed=e5_embed)

    @chunks_t.query
    def top_k(query_text: str):
        sim = chunks_t.text.similarity(query_text)
        return (
            chunks_t.order_by(sim, asc=False)
                .select(chunks_t.text, sim=sim)
                .limit(5)
        )

    # Add computed columns to create the chain of transformations
    t['question_context'] = chunks_t.queries.top_k(t.question)
    t['prompt'] = create_prompt(t.question_context, t.question)
    t['messages'] = create_messages(t.prompt)  # New computed column for messages
    
    # Add the response column using the messages computed column
    t['response'] = openai.chat_completions(
        model='gpt-4o-mini-2024-07-18',
        messages=t.messages,  # Use the computed messages column
        max_tokens=300,
        top_p=0.9,
        temperature=0.7
    )
    t['gpt4omini'] = t.response.choices[0].message.content

    return "Files processed successfully. You can start the discussion."

def get_answer(token, msg):
    if not validate_token(token):
        return "Invalid token. Please enter a valid Hugging Face token."

    t = pxt.get_table('chatbot_demo.documents')
            
    # Insert the question into the table
    t.insert([{'question': msg}])
    
    # The answer will be automatically generated through the chain of computed columns
    answer = t.select(t.gpt4omini).where(t.question == msg).collect()['gpt4omini'][0]
    return answer

def respond(token, message, chat_history):
    bot_message = get_answer(token, message)
    chat_history.append((message, bot_message))
    return "", chat_history

# Gradio interface
with gr.Blocks(theme=gr.themes.Base()) as demo:
    gr.Markdown(
        """
        <div>
            <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 200px; margin-bottom: 20px;" />
            <h1 style="margin-bottom: 0.5em;">AI Chatbot With Retrieval-Augmented Generation (RAG)</h1>
        </div>
        """
    )
    gr.HTML(
        """
        <p>
            <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
        </p>
        
        <div style="background-color: #E5DDD4; border: 1px solid #e9ecef; color: #000000; border-radius: 8px; padding: 15px; margin-bottom: 20px;">
            <strong style="color: #000000">Disclaimer:</strong> This app is best run on your own hardware with a GPU for optimal performance. This Hugging Face Space uses the free tier (2vCPU, 16GB RAM), which results in slower processing times. If you wish to use this app with your own hardware for improved performance, you can <a href="https://huggingface.co/spaces/Pixeltable/AI-Chatbot-With-Retrieval-Augmented-Generation?duplicate=true" target="_blank" style="color: #4D148C; text-decoration: none; font-weight: bold;">duplicate this Hugging Face Space</a>, run it locally, or use Google Colab with the Free limited GPU support.
        </div>
        """
    )

    with gr.Row():
        with gr.Column():  
           with gr.Accordion("What This Demo Does", open = True):
            gr.Markdown("""
            - Upload multiple PDF documents.
            - Process and index the content of these documents.
            - Ask questions about the content and Receive AI-generated answers that are grounded.
         """)
        with gr.Column():  
          with gr.Accordion("How does it work?", open = True):
            gr.Markdown("""
            - When a user asks a question, the system searches for the most relevant chunks of text from the uploaded documents.
            - It then uses these relevant chunks as context for a large language model (LLM) to generate an answer.
            - The LLM formulates a response based on the provided context and the user's question.
          """)

    user_token = gr.Textbox(label="Enter your Hugging Face Token", type="password")
          
    with gr.Row():
        with gr.Column(scale=1):
            pdf_files = gr.File(label="Upload PDF Documents", file_count="multiple")
            chunk_limit = gr.Slider(minimum=100, maximum=500, value=300, step=5, label="Chunk Size Limit")
            chunk_separator = gr.Dropdown(
                choices=["token_limit", "char_limit", "sentence", "paragraph", "heading"],
                value="token_limit",
                label="Chunk Separator"
            )
            process_button = gr.Button("Process Files")
            process_output = gr.Textbox(label="Processing Output")

        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Chat History")
            msg = gr.Textbox(label="Your Question", placeholder="Ask a question about the uploaded documents")
            submit = gr.Button("Submit")

    process_button.click(process_files, inputs=[user_token,pdf_files, chunk_limit, chunk_separator], outputs=[process_output])
    submit.click(respond, inputs=[user_token, msg, chatbot], outputs=[msg, chatbot])

if __name__ == "__main__":
    demo.launch()