import gradio as gr import faiss import numpy as np import pandas as pd from sentence_transformers import SentenceTransformer from langchain_anthropic import ChatAnthropic from langchain.retrievers.multi_query import MultiQueryRetriever from langchain.chains import create_retrieval_chain from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate from langchain_community.vectorstores import FAISS from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_core.documents import Document from langchain_huggingface import HuggingFaceEmbeddings from gradio import Markdown import os from dotenv import load_dotenv # Paths INDEX_PATH = "index/index_file.index" CSV_PATH = "csv/processed_markdown_data.csv" # Load FAISS index index = faiss.read_index(INDEX_PATH) # Load document store (CSV) df = pd.read_csv(CSV_PATH) all_segments = df['Segment'].tolist() # Ensure the number of segments matches the number of vectors in the index assert len(all_segments) == index.ntotal, "Mismatch between number of segments and vectors in the index" # Set up HuggingFace embeddings embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Assuming you have the `documents` prepared like in your notebook documents = [ Document(page_content=segment, metadata={"source": f"doc_{i}"}) for i, segment in enumerate(all_segments)] docstore = InMemoryDocstore({f"doc_{i}": doc for i, doc in enumerate(documents)}) vector_store = FAISS( embedding_function=embeddings, index=index, docstore=docstore, index_to_docstore_id={i: f"doc_{i}" for i in range(index.ntotal)} ) # Api key load_dotenv() api_key = os.getenv("ANTHROPIC_API_KEY") # Anthropic API setup (Claude 3 Haiku) llm = ChatAnthropic( api_key=api_key, model="claude-3-haiku-20240307", temperature=0.2, max_tokens_to_sample=1024, ) # Multi-query retriever multi_query_retriever = MultiQueryRetriever.from_llm( retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 6}), llm=llm ) # Prompt and retrieval chain setup system_prompt = """You are an assistant with access to my notes. The notes are about different topics that are interesting to me. Your task is to provide insights on the content I've saved in the past. You will be comprehensive and informative in your response. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. {context}""" prompt = ChatPromptTemplate.from_messages([ ("system", system_prompt), ("human", "{input}"), ]) question_answer_chain = create_stuff_documents_chain(llm, prompt) rag_chain = create_retrieval_chain(multi_query_retriever, question_answer_chain) # Gradio interface info_text = """ by [Mark Redito](https://markredito.com) This chatbot has access to my browser bookmarks from 2020 to mid-2024. It covers a variety of topics I’m interested in, including Art, Technology, and Culture. ## You can use it in a few ways: - Extract specific links. For example: "Give me the links about Ethereum" - Get summaries of bookmarked content. Try: "Summarize 'How to do great work' by Paul Graham" - Ask general questions on various topics. Like: "What goes into a typical music recording contract?" ## Here's a quick rundown of how it works behind the scenes: - The system uses RAG (Retrieval-Augmented Generation) with a framework called Langchain. Basically, it helps the chatbot find and use relevant information. - The bookmarks are stored in a database called FAISS that makes searching super fast. - The brains of the operation is Claude 3 Haiku, a small and fast AI model by Anthropic. - When you ask a question, the system comes up with a few more related questions to help find the right links. It then searches the database and passes the best information to Claude to craft your answer. Keep in mind, if the chatbot can't find good information to answer your question, it'll let you know by saying something like "I don't know" or "I can't find it." And like any AI, it might make mistakes sometimes. This is mostly a fun project I put together for my own curiosity and enjoyment. While I can't make any promises about its performance, I hope you have fun exploring and maybe discover something interesting! Enjoy! """ # The respond function def respond(message, history, max_tokens, temperature, top_p): # Process user message through RAG chain response = rag_chain.invoke({"input": message}) # Extract the answer from the response if isinstance(response, dict) and 'answer' in response: answer = response['answer'] else: answer = str(response) # Convert to string if it's not in the expected format return answer def chat_response(message, history, max_tokens, temperature, top_p): bot_message = respond(message, history, max_tokens, temperature, top_p) return bot_message # Refactored Gradio Interface with gr.Blocks(fill_height=True) as demo: # Main header gr.Markdown("# Welcome to My Bookmarks Chatbot") # Collapsible info section with gr.Accordion("Click to view info about the chatbot", open=False): gr.Markdown(info_text) # Load the info text into the collapsible section with gr.Accordion("Advanced Options", open=False): max_tokens_slider = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") # Chatbot interface with additional options gr.ChatInterface( fn=chat_response, additional_inputs=[max_tokens_slider, temperature_slider, top_p_slider], examples=[ ["How to do great work by Paul Graham?", 512, 0.7, 0.95], ["Give me the links about Ethereum from the notes", 512, 0.7, 0.95], ["What goes into a typical music recording contract?", 512, 0.7, 0.95] ], retry_btn="Retry", undo_btn="Undo", clear_btn="Clear", ) if __name__ == "__main__": demo.launch()