Spaces:

oliverwang15
/

DAN_AI

Sleeping

App Files Files Community

oliverwang15 commited on Nov 4, 2023

Commit

acc114a

1 Parent(s): 4831f4c

update on the .pdf format support

Browse files

Files changed (4) hide show

app.py +1 -1
backend.py +19 -3
openai.py +0 -1
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -31,7 +31,7 @@ with gr.Blocks(theme="dark") as demo:
                         label='Enter your OpenAI API key here',
                         type='password')
-                file = gr.File(label='Upload your .txt file here', file_types=['.txt'], file_count = 'multiple')
                 questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions", info="Please select the question you want to ask")

                         label='Enter your OpenAI API key here',
                         type='password')
+                file = gr.File(label='Upload your .txt or .pdf file here', file_types=['.txt', '.pdf'], file_count = 'multiple')
                 questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions", info="Please select the question you want to ask")

backend.py CHANGED Viewed

@@ -21,15 +21,28 @@ class Backend:
             raise gr.Error("You need to upload a file first")
         return text
     def read_file(self, files):
         # read the file
         text_list = []
         self.filename_list = []
         if files is not None:
             for file in files:
-                with open(file.name, 'r', encoding='utf-8') as f:
-                    text_list.append(f.read())
-                    self.filename_list.append(file.name.split('\\')[-1])
         else:
             raise gr.Error("You need to upload a file first")
         return text_list
@@ -205,11 +218,13 @@ class Backend:
             del self.clicked_correct_reference
         self.current_passage += 1
         if self.current_passage >= self.total_passages:
             # self.current_passage -= 1
             return "No more passages!", "No more passages!", "No more passages!", "No more passages!", 'No more passages!', 'No more passages!', 'Still need to click the button above to save the results', None, None
         else:
             gpt_res = self.res_list[self.current_passage]
             self.gpt_result = gpt_res
             res = self.gpt_result[f'Question {self.current_question + 1}']
@@ -236,6 +251,7 @@ class Backend:
             # self.current_passage += 1
             return "No more passages!", "No more passages!", "No more passages!", "No more passages!", 'No more passages!', 'No more passages!', 'Still need to click the button above to save the results', None, None
         else:
             gpt_res = self.res_list[self.current_passage]
             self.gpt_result = gpt_res
             res = self.gpt_result[f'Question {self.current_question + 1}']

             raise gr.Error("You need to upload a file first")
         return text
+    def phrase_pdf(self, file_path):
+        from langchain.document_loaders import UnstructuredPDFLoader
+        loader = UnstructuredPDFLoader(file_path, model = 'elements')
+        file = loader.load()
+        return file[0].page_content
     def read_file(self, files):
         # read the file
         text_list = []
         self.filename_list = []
         if files is not None:
             for file in files:
+                if file.name.split('.')[-1] == 'pdf':
+                    # convert pdf to txt
+                    text = self.phrase_pdf(file.name)
+                else:
+                    with open(file.name, 'r', encoding='utf-8') as f:
+                        text = f.read()
+                text_list.append(text)
+                self.filename_list.append(file.name.split('\\')[-1])
         else:
             raise gr.Error("You need to upload a file first")
         return text_list
             del self.clicked_correct_reference
         self.current_passage += 1
         if self.current_passage >= self.total_passages:
             # self.current_passage -= 1
             return "No more passages!", "No more passages!", "No more passages!", "No more passages!", 'No more passages!', 'No more passages!', 'Still need to click the button above to save the results', None, None
         else:
+            self.text = self.text_list[self.current_passage]
             gpt_res = self.res_list[self.current_passage]
             self.gpt_result = gpt_res
             res = self.gpt_result[f'Question {self.current_question + 1}']
             # self.current_passage += 1
             return "No more passages!", "No more passages!", "No more passages!", "No more passages!", 'No more passages!', 'No more passages!', 'Still need to click the button above to save the results', None, None
         else:
+            self.text = self.text_list[self.current_passage]
             gpt_res = self.res_list[self.current_passage]
             self.gpt_result = gpt_res
             res = self.gpt_result[f'Question {self.current_question + 1}']

openai.py CHANGED Viewed

@@ -32,7 +32,6 @@ class OpenAI:
         }, headers={
             'Authorization': f"Bearer {api_key}"
         })
-        print(resp)
         self.history.append(resp.json()['choices'][0]['message'])
         res = resp.json()['choices'][0]['message']['content']

         }, headers={
             'Authorization': f"Bearer {api_key}"
         })
         self.history.append(resp.json()['choices'][0]['message'])
         res = resp.json()['choices'][0]['message']['content']

requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 fuzzywuzzy
-openpyxl

 fuzzywuzzy
+openpyxl
+"unstructured[all-docs]"
+langchain