Spaces:

towardsai-tutors
/

buster

Running

App Files Files Community

jerpint commited on Oct 7, 2023

Commit

e0e448c

unverified ·

1 Parent(s): fc1544a

support multiple sources (#10)

Browse files

* support multiple sources

* start app with all sources enabled

Files changed (3) hide show

cfg.py +2 -1
gradio_app.py +11 -5
requirements.txt +1 -1

cfg.py CHANGED Viewed

@@ -21,7 +21,6 @@ PASSWORD = os.getenv("BUSTER_PASSWORD")
 HUB_TOKEN = os.getenv("HUB_TOKEN")
 REPO_ID = os.getenv("HF_DATASET")
-# HUB_DB_FILE = "deeplake_store.zip"
 DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "wiki_tai_langchain")
 ZIP_FILE = DEEPLAKE_DATASET + ".zip"
@@ -79,6 +78,8 @@ A user will now submit a question. Respond 'true' if it is valid, respond 'false
         "thresh": 0.7,
         "max_tokens": 2000,
         "embedding_model": "text-embedding-ada-002",
     },
     documents_answerer_cfg={
         "no_documents_message": "No blog posts are available for this question.",

 HUB_TOKEN = os.getenv("HUB_TOKEN")
 REPO_ID = os.getenv("HF_DATASET")
 DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "wiki_tai_langchain")
 ZIP_FILE = DEEPLAKE_DATASET + ".zip"
         "thresh": 0.7,
         "max_tokens": 2000,
         "embedding_model": "text-embedding-ada-002",
+        "exec_option": "compute_engine",
+        "use_tql": True,
     },
     documents_answerer_cfg={
         "no_documents_message": "No blog posts are available for this question.",

gradio_app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import os
 import gradio as gr
 import pandas as pd
@@ -15,6 +16,7 @@ logging.getLogger("httpx").setLevel(logging.WARNING)
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 def check_auth(username: str, password: str) -> bool:
     valid_user = username == cfg.USERNAME
@@ -40,7 +42,7 @@ def format_sources(matched_documents: pd.DataFrame) -> str:
         "similarity_to_answer", ascending=False
     ).drop_duplicates("title", keep="first")
-    documents = "\n\n".join(
         [
             document_template.format(document=document)
             for _, document in matched_documents.iterrows()
@@ -64,10 +66,10 @@ def user(user_input, history):
     return "", history + [[user_input, None]]
-def chat(history):
     user_input = history[-1][0]
-    completion = buster.process_input(user_input)
     history[-1][1] = ""
@@ -85,6 +87,10 @@ with block:
             "<h3><center>Buster 🤖: A Question-Answering Bot for your documentation</center></h3>"
         )
     chatbot = gr.Chatbot()
     with gr.Row():
@@ -107,10 +113,10 @@ with block:
     response = gr.State()
     submit.click(user, [question, chatbot], [question, chatbot], queue=False).then(
-        chat, inputs=[chatbot], outputs=[chatbot, response]
     ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
     question.submit(user, [question, chatbot], [question, chatbot], queue=False).then(
-        chat, inputs=[chatbot], outputs=[chatbot, response]
     ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])

 import logging
 import os
+from typing import Optional
 import gradio as gr
 import pandas as pd
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
+AVAILABLE_SOURCES = ["towardsai", "wikipedia", "langchain_course"]
 def check_auth(username: str, password: str) -> bool:
     valid_user = username == cfg.USERNAME
         "similarity_to_answer", ascending=False
     ).drop_duplicates("title", keep="first")
+    documents = "\n".join(
         [
             document_template.format(document=document)
             for _, document in matched_documents.iterrows()
     return "", history + [[user_input, None]]
+def get_answer(history, sources: Optional[list[str]] = None):
     user_input = history[-1][0]
+    completion = buster.process_input(user_input, sources=sources)
     history[-1][1] = ""
             "<h3><center>Buster 🤖: A Question-Answering Bot for your documentation</center></h3>"
         )
+    source_selection = gr.CheckboxGroup(
+        choices=AVAILABLE_SOURCES, label="Select Sources", value=AVAILABLE_SOURCES
+    )
     chatbot = gr.Chatbot()
     with gr.Row():
     response = gr.State()
     submit.click(user, [question, chatbot], [question, chatbot], queue=False).then(
+        get_answer, inputs=[chatbot, source_selection], outputs=[chatbot, response]
     ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
     question.submit(user, [question, chatbot], [question, chatbot], queue=False).then(
+        get_answer, inputs=[chatbot, source_selection], outputs=[chatbot, response]
     ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
-buster-doctalk==1.0.19
 gradio
 deeplake

+git+https://github.com/jerpint/buster@multiple-sources
 gradio
 deeplake