Spaces:
Sleeping
Sleeping
Commit
·
acc114a
1
Parent(s):
4831f4c
update on the .pdf format support
Browse files- app.py +1 -1
- backend.py +19 -3
- openai.py +0 -1
- requirements.txt +3 -1
app.py
CHANGED
@@ -31,7 +31,7 @@ with gr.Blocks(theme="dark") as demo:
|
|
31 |
label='Enter your OpenAI API key here',
|
32 |
type='password')
|
33 |
|
34 |
-
file = gr.File(label='Upload your .txt file here', file_types=['.txt'], file_count = 'multiple')
|
35 |
|
36 |
questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions", info="Please select the question you want to ask")
|
37 |
|
|
|
31 |
label='Enter your OpenAI API key here',
|
32 |
type='password')
|
33 |
|
34 |
+
file = gr.File(label='Upload your .txt or .pdf file here', file_types=['.txt', '.pdf'], file_count = 'multiple')
|
35 |
|
36 |
questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions", info="Please select the question you want to ask")
|
37 |
|
backend.py
CHANGED
@@ -21,15 +21,28 @@ class Backend:
|
|
21 |
raise gr.Error("You need to upload a file first")
|
22 |
return text
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def read_file(self, files):
|
25 |
# read the file
|
26 |
text_list = []
|
27 |
self.filename_list = []
|
28 |
if files is not None:
|
29 |
for file in files:
|
30 |
-
|
31 |
-
|
32 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
else:
|
34 |
raise gr.Error("You need to upload a file first")
|
35 |
return text_list
|
@@ -205,11 +218,13 @@ class Backend:
|
|
205 |
del self.clicked_correct_reference
|
206 |
|
207 |
self.current_passage += 1
|
|
|
208 |
|
209 |
if self.current_passage >= self.total_passages:
|
210 |
# self.current_passage -= 1
|
211 |
return "No more passages!", "No more passages!", "No more passages!", "No more passages!", 'No more passages!', 'No more passages!', 'Still need to click the button above to save the results', None, None
|
212 |
else:
|
|
|
213 |
gpt_res = self.res_list[self.current_passage]
|
214 |
self.gpt_result = gpt_res
|
215 |
res = self.gpt_result[f'Question {self.current_question + 1}']
|
@@ -236,6 +251,7 @@ class Backend:
|
|
236 |
# self.current_passage += 1
|
237 |
return "No more passages!", "No more passages!", "No more passages!", "No more passages!", 'No more passages!', 'No more passages!', 'Still need to click the button above to save the results', None, None
|
238 |
else:
|
|
|
239 |
gpt_res = self.res_list[self.current_passage]
|
240 |
self.gpt_result = gpt_res
|
241 |
res = self.gpt_result[f'Question {self.current_question + 1}']
|
|
|
21 |
raise gr.Error("You need to upload a file first")
|
22 |
return text
|
23 |
|
24 |
+
def phrase_pdf(self, file_path):
|
25 |
+
from langchain.document_loaders import UnstructuredPDFLoader
|
26 |
+
loader = UnstructuredPDFLoader(file_path, model = 'elements')
|
27 |
+
file = loader.load()
|
28 |
+
return file[0].page_content
|
29 |
+
|
30 |
def read_file(self, files):
|
31 |
# read the file
|
32 |
text_list = []
|
33 |
self.filename_list = []
|
34 |
if files is not None:
|
35 |
for file in files:
|
36 |
+
if file.name.split('.')[-1] == 'pdf':
|
37 |
+
# convert pdf to txt
|
38 |
+
text = self.phrase_pdf(file.name)
|
39 |
+
|
40 |
+
else:
|
41 |
+
with open(file.name, 'r', encoding='utf-8') as f:
|
42 |
+
text = f.read()
|
43 |
+
|
44 |
+
text_list.append(text)
|
45 |
+
self.filename_list.append(file.name.split('\\')[-1])
|
46 |
else:
|
47 |
raise gr.Error("You need to upload a file first")
|
48 |
return text_list
|
|
|
218 |
del self.clicked_correct_reference
|
219 |
|
220 |
self.current_passage += 1
|
221 |
+
|
222 |
|
223 |
if self.current_passage >= self.total_passages:
|
224 |
# self.current_passage -= 1
|
225 |
return "No more passages!", "No more passages!", "No more passages!", "No more passages!", 'No more passages!', 'No more passages!', 'Still need to click the button above to save the results', None, None
|
226 |
else:
|
227 |
+
self.text = self.text_list[self.current_passage]
|
228 |
gpt_res = self.res_list[self.current_passage]
|
229 |
self.gpt_result = gpt_res
|
230 |
res = self.gpt_result[f'Question {self.current_question + 1}']
|
|
|
251 |
# self.current_passage += 1
|
252 |
return "No more passages!", "No more passages!", "No more passages!", "No more passages!", 'No more passages!', 'No more passages!', 'Still need to click the button above to save the results', None, None
|
253 |
else:
|
254 |
+
self.text = self.text_list[self.current_passage]
|
255 |
gpt_res = self.res_list[self.current_passage]
|
256 |
self.gpt_result = gpt_res
|
257 |
res = self.gpt_result[f'Question {self.current_question + 1}']
|
openai.py
CHANGED
@@ -32,7 +32,6 @@ class OpenAI:
|
|
32 |
}, headers={
|
33 |
'Authorization': f"Bearer {api_key}"
|
34 |
})
|
35 |
-
print(resp)
|
36 |
self.history.append(resp.json()['choices'][0]['message'])
|
37 |
res = resp.json()['choices'][0]['message']['content']
|
38 |
|
|
|
32 |
}, headers={
|
33 |
'Authorization': f"Bearer {api_key}"
|
34 |
})
|
|
|
35 |
self.history.append(resp.json()['choices'][0]['message'])
|
36 |
res = resp.json()['choices'][0]['message']['content']
|
37 |
|
requirements.txt
CHANGED
@@ -1,2 +1,4 @@
|
|
1 |
fuzzywuzzy
|
2 |
-
openpyxl
|
|
|
|
|
|
1 |
fuzzywuzzy
|
2 |
+
openpyxl
|
3 |
+
"unstructured[all-docs]"
|
4 |
+
langchain
|