import os from langchain.text_splitter import CharacterTextSplitter from langchain.document_loaders import TextLoader, DirectoryLoader from langchain.embeddings import CohereEmbeddings from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.llms import OpenAI from langchain.llms import Cohere from langchain.chains import RetrievalQA from langchain.prompt import PromptTemplate import streamlit as st def ingest(file_path,embeddings,vectordb,persist_directory): loader = TextLoader(file_path) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000) #Splitting the text and creating chunks docs = text_splitter.split_documents(documents) vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory=persist_directory) # persiste the db to disk vectordb.persist() def retrieve(query): template = """ Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.Use only the document for your answer and you may summarize the answer in 50 words to make it look better. {context} Question: {question} """ # create the chain to answer questions qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, chain_type_kwargs={ "prompt": PromptTemplate( template=template, input_variables=["context", "question"],)}) return(qa_chain.run(query)) def main(): # Main title of the application st.title("Complaints BOT") persist_directory = "myVectorDB" print('persist dict: ') print(persist_directory) with st.sidebar: with st.form('Cohere/OpenAI'): mod = st.radio('Choose OpenAI/Cohere', ('OpenAI', 'Cohere')) api_key = st.text_input('Enter API key', type="password") # model = st.radio('Choose Company', ('ArtisanAppetite foods', 'BMW','Titan Watches')) submitted = st.form_submit_button("Submit") if api_key: if(mod=='OpenAI'): os.environ["OPENAI_API_KEY"] = api_key llm = OpenAI(temperature=0.7, verbose=True) embeddings = OpenAIEmbeddings() elif(mod=='Cohere'): os.environ["COHERE_API_KEY"] = api_key llm = Cohere(temperature=0.7, verbose=True) embeddings = CohereEmbeddings() elif (not api_key): st.info("Please add configuration details in left panel") st.stop() # Now we can load the persisted database from disk, and use it as normal. vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings) # create a retriever retriever = vectordb.as_retriever(search_kwargs={"k": 3}) uploaded_file = st.file_uploader("Upload a file to ingest", type=["txt"]) if uploaded_file is not None: file_path = uploaded_file.name print(file_path) ingest(file_path,embeddings,vectordb,persist_directory) query = st.text_input("Query: ", "", key="input") result_display = st.empty() if query is not None: result = retrieve(query) # Text area for editing the generated response result_display.text_area("Result:", value=result, height=500) if __name__ == "__main__": main()