ssumukh17 commited on
Commit
88fb925
1 Parent(s): 5430b34

Upload function.py

Browse files
Files changed (1) hide show
  1. function.py +67 -0
function.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.llms import OpenAI
2
+ from langchain.chains import AnalyzeDocumentChain
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.document_loaders import UnstructuredEmailLoader
5
+ from langchain.document_loaders import Docx2txtLoader
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ import os
8
+ from pandas_ai import csv_file,excel_file
9
+
10
+
11
+ def pdf_file(file_upload,message):
12
+ page_text = ''
13
+ loader = PyPDFLoader(file_upload)
14
+ pages = loader.load_and_split()
15
+ for page in pages:
16
+ page_text = page_text + page.page_content
17
+ os.environ['OPENAI_API_KEY'] = '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'
18
+ llm = OpenAI(temperature=0)
19
+ qa_chain = load_qa_chain(llm, chain_type="map_reduce")
20
+ qa_pdf_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
21
+ output = qa_pdf_chain.run(input_document=page_text,question=message)
22
+ return output
23
+
24
+ def email_file(file_upload,message):
25
+ email_text = ''
26
+ loader = UnstructuredEmailLoader(file_upload,mode="elements",process_attachments=True)
27
+ emails = loader.load()
28
+ for email in emails:
29
+ email_text = email_text + email.page_content
30
+ os.environ['OPENAI_API_KEY'] = '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'
31
+ llm = OpenAI(temperature=0)
32
+ qa_chain = load_qa_chain(llm, chain_type="map_reduce")
33
+ qa_email_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
34
+ output = qa_email_chain.run(input_document=email_text,question=message)
35
+ return output
36
+
37
+ def docx_file(file_upload,message):
38
+ doc_text = ''
39
+ loader = Docx2txtLoader(file_upload)
40
+ documents = loader.load()
41
+ for doc in documents:
42
+ doc_text = doc_text + doc.page_content
43
+ os.environ['OPENAI_API_KEY'] = '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'
44
+ llm = OpenAI(temperature=0)
45
+ qa_chain = load_qa_chain(llm, chain_type="map_reduce")
46
+ qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
47
+ output = qa_document_chain.run(input_document=doc_text,question=message)
48
+ return output
49
+
50
+ def initiate_process(file_upload,extention,message):
51
+ # try:
52
+ if extention=='pdf':
53
+ output = pdf_file(file_upload,message)
54
+ elif extention=='eml':
55
+ output = email_file(file_upload,message)
56
+ elif extention=='docx':
57
+ output = docx_file(file_upload,message)
58
+ elif extention=='csv':
59
+ output = csv_file(file_upload,message)
60
+ elif extention=='xlsx':
61
+ output = excel_file(file_upload,message)
62
+ else:
63
+ output = "Please upload correct file format"
64
+ return output
65
+ # except Exception as e:
66
+ # output = str(e)
67
+ # return output