from langchain.llms import OpenAI from langchain.chains import AnalyzeDocumentChain from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import UnstructuredEmailLoader from langchain.document_loaders import Docx2txtLoader from langchain.chains.question_answering import load_qa_chain import os from pandas_ai import csv_file,excel_file def pdf_file(file_upload,message): page_text = '' loader = PyPDFLoader(file_upload) pages = loader.load_and_split() for page in pages: page_text = page_text + page.page_content os.environ['OPENAI_API_KEY'] = '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' llm = OpenAI(temperature=0) qa_chain = load_qa_chain(llm, chain_type="map_reduce") qa_pdf_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain) output = qa_pdf_chain.run(input_document=page_text,question=message) return output def email_file(file_upload,message): email_text = '' loader = UnstructuredEmailLoader(file_upload,mode="elements",process_attachments=True) emails = loader.load() for email in emails: email_text = email_text + email.page_content os.environ['OPENAI_API_KEY'] = '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' llm = OpenAI(temperature=0) qa_chain = load_qa_chain(llm, chain_type="map_reduce") qa_email_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain) output = qa_email_chain.run(input_document=email_text,question=message) return output def docx_file(file_upload,message): doc_text = '' loader = Docx2txtLoader(file_upload) documents = loader.load() for doc in documents: doc_text = doc_text + doc.page_content os.environ['OPENAI_API_KEY'] = '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' llm = OpenAI(temperature=0) qa_chain = load_qa_chain(llm, chain_type="map_reduce") qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain) output = qa_document_chain.run(input_document=doc_text,question=message) return output def initiate_process(file_upload,extention,message): # try: if extention=='pdf': output = pdf_file(file_upload,message) elif extention=='eml': output = email_file(file_upload,message) elif extention=='docx': output = docx_file(file_upload,message) elif extention=='csv': output = csv_file(file_upload,message) elif extention=='xlsx': output = excel_file(file_upload,message) else: output = "Please upload correct file format" return output # except Exception as e: # output = str(e) # return output