bookmarkschat / app.py
markredito's picture
added gr.Accordion to hide info text
801818c verified
import gradio as gr
import faiss
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from langchain_anthropic import ChatAnthropic
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from gradio import Markdown
import os
from dotenv import load_dotenv
# Paths
INDEX_PATH = "index/index_file.index"
CSV_PATH = "csv/processed_markdown_data.csv"
# Load FAISS index
index = faiss.read_index(INDEX_PATH)
# Load document store (CSV)
df = pd.read_csv(CSV_PATH)
all_segments = df['Segment'].tolist()
# Ensure the number of segments matches the number of vectors in the index
assert len(all_segments) == index.ntotal, "Mismatch between number of segments and vectors in the index"
# Set up HuggingFace embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Assuming you have the `documents` prepared like in your notebook
documents = [
Document(page_content=segment, metadata={"source": f"doc_{i}"})
for i, segment in enumerate(all_segments)]
docstore = InMemoryDocstore({f"doc_{i}": doc for i, doc in enumerate(documents)})
vector_store = FAISS(
embedding_function=embeddings,
index=index,
docstore=docstore,
index_to_docstore_id={i: f"doc_{i}" for i in range(index.ntotal)}
)
# Api key
load_dotenv()
api_key = os.getenv("ANTHROPIC_API_KEY")
# Anthropic API setup (Claude 3 Haiku)
llm = ChatAnthropic(
api_key=api_key,
model="claude-3-haiku-20240307",
temperature=0.2,
max_tokens_to_sample=1024,
)
# Multi-query retriever
multi_query_retriever = MultiQueryRetriever.from_llm(
retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 6}),
llm=llm
)
# Prompt and retrieval chain setup
system_prompt = """You are an assistant with access to my notes. The notes are about different topics that are interesting to me.
Your task is to provide insights on the content I've saved in the past. You will be comprehensive and informative in your response.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, say that you don't know.
{context}"""
prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("human", "{input}"),
])
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(multi_query_retriever, question_answer_chain)
# Gradio interface
info_text = """
by [Mark Redito](https://markredito.com)
This chatbot has access to my browser bookmarks from 2020 to mid-2024. It covers a variety of topics I’m interested in, including Art, Technology, and Culture.
## You can use it in a few ways:
- Extract specific links. For example: "Give me the links about Ethereum"
- Get summaries of bookmarked content. Try: "Summarize 'How to do great work' by Paul Graham"
- Ask general questions on various topics. Like: "What goes into a typical music recording contract?"
## Here's a quick rundown of how it works behind the scenes:
- The system uses RAG (Retrieval-Augmented Generation) with a framework called Langchain. Basically, it helps the chatbot find and use relevant information.
- The bookmarks are stored in a database called FAISS that makes searching super fast.
- The brains of the operation is Claude 3 Haiku, a small and fast AI model by Anthropic.
- When you ask a question, the system comes up with a few more related questions to help find the right links. It then searches the database and passes the best information to Claude to craft your answer.
Keep in mind, if the chatbot can't find good information to answer your question, it'll let you know by saying something like "I don't know" or "I can't find it." And like any AI, it might make mistakes sometimes.
This is mostly a fun project I put together for my own curiosity and enjoyment. While I can't make any promises about its performance, I hope you have fun exploring and maybe discover something interesting! Enjoy!
"""
# The respond function
def respond(message, history, max_tokens, temperature, top_p):
# Process user message through RAG chain
response = rag_chain.invoke({"input": message})
# Extract the answer from the response
if isinstance(response, dict) and 'answer' in response:
answer = response['answer']
else:
answer = str(response) # Convert to string if it's not in the expected format
return answer
def chat_response(message, history, max_tokens, temperature, top_p):
bot_message = respond(message, history, max_tokens, temperature, top_p)
return bot_message
# Refactored Gradio Interface
with gr.Blocks(fill_height=True) as demo:
# Main header
gr.Markdown("# Welcome to My Bookmarks Chatbot")
# Collapsible info section
with gr.Accordion("Click to view info about the chatbot", open=False):
gr.Markdown(info_text) # Load the info text into the collapsible section
with gr.Accordion("Advanced Options", open=False):
max_tokens_slider = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
# Chatbot interface with additional options
gr.ChatInterface(
fn=chat_response,
additional_inputs=[max_tokens_slider, temperature_slider, top_p_slider],
examples=[
["How to do great work by Paul Graham?", 512, 0.7, 0.95],
["Give me the links about Ethereum from the notes", 512, 0.7, 0.95],
["What goes into a typical music recording contract?", 512, 0.7, 0.95]
],
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
)
if __name__ == "__main__":
demo.launch()