Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ import sys
|
|
15 |
import time
|
16 |
import re
|
17 |
import textract
|
18 |
-
import zipfile
|
19 |
import random
|
20 |
|
21 |
from datetime import datetime
|
@@ -33,6 +33,8 @@ from langchain.chat_models import ChatOpenAI
|
|
33 |
from langchain.memory import ConversationBufferMemory
|
34 |
from langchain.chains import ConversationalRetrievalChain
|
35 |
from templates import css, bot_template, user_template
|
|
|
|
|
36 |
|
37 |
# page config and sidebar declares up front allow all other functions to see global class variables
|
38 |
st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
|
@@ -163,7 +165,6 @@ def add_paper_buttons_and_links():
|
|
163 |
add_paper_buttons_and_links()
|
164 |
|
165 |
|
166 |
-
|
167 |
# Process user input is a post processor algorithm which runs after document embedding vector DB play of GPT on context of documents..
|
168 |
def process_user_input(user_question):
|
169 |
# Check and initialize 'conversation' in session state if not present
|
@@ -205,32 +206,6 @@ def extract_feature_and_detail(paragraph):
|
|
205 |
return header, detail
|
206 |
return None, None
|
207 |
|
208 |
-
|
209 |
-
def process_user_input_old(user_question):
|
210 |
-
response = st.session_state.conversation({'question': user_question})
|
211 |
-
st.session_state.chat_history = response['chat_history']
|
212 |
-
for i, message in enumerate(st.session_state.chat_history):
|
213 |
-
template = user_template if i % 2 == 0 else bot_template
|
214 |
-
st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
|
215 |
-
# Save file output from PDF query results
|
216 |
-
filename = generate_filename(user_question, 'txt')
|
217 |
-
#create_file(filename, user_question, message.content)
|
218 |
-
response = message.content
|
219 |
-
user_prompt = user_question
|
220 |
-
create_file(filename, user_prompt, response, should_save)
|
221 |
-
#st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
def generate_filename_old(prompt, file_type):
|
229 |
-
central = pytz.timezone('US/Central')
|
230 |
-
safe_date_time = datetime.now(central).strftime("%m%d_%H%M") # Date and time DD-HHMM
|
231 |
-
safe_prompt = "".join(x for x in prompt if x.isalnum())[:90] # Limit file name size and trim whitespace
|
232 |
-
return f"{safe_date_time}_{safe_prompt}.{file_type}" # Return a safe file name
|
233 |
-
|
234 |
def generate_filename(prompt, file_type):
|
235 |
central = pytz.timezone('US/Central')
|
236 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
@@ -333,71 +308,6 @@ def create_file(filename, prompt, response, should_save=True):
|
|
333 |
file.write(combined_content)
|
334 |
|
335 |
|
336 |
-
def create_file_old2(filename, prompt, response, should_save=True):
|
337 |
-
if not should_save:
|
338 |
-
return
|
339 |
-
|
340 |
-
# Step 2: Extract base filename without extension
|
341 |
-
base_filename, ext = os.path.splitext(filename)
|
342 |
-
|
343 |
-
# Step 3: Check if the response contains Python code
|
344 |
-
has_python_code = bool(re.search(r"```python([\s\S]*?)```", response))
|
345 |
-
|
346 |
-
# Step 4: Initialize the combined content
|
347 |
-
combined_content = ""
|
348 |
-
|
349 |
-
# Add Prompt with markdown title and emoji
|
350 |
-
combined_content += "# Prompt 📝\n" + prompt + "\n\n"
|
351 |
-
|
352 |
-
# Add Response with markdown title and emoji
|
353 |
-
combined_content += "# Response 💬\n" + response + "\n\n"
|
354 |
-
|
355 |
-
# Check for Python code or other resources and add them with markdown title and emoji
|
356 |
-
resources = re.findall(r"```([\s\S]*?)```", response)
|
357 |
-
for resource in resources:
|
358 |
-
# Check if the resource contains Python code
|
359 |
-
if "python" in resource.lower():
|
360 |
-
st.markdown('# Running python.. ')
|
361 |
-
# Remove the word 'python' from the beginning of the code block
|
362 |
-
cleaned_code = re.sub(r'^\s*python', '', resource, flags=re.IGNORECASE | re.MULTILINE)
|
363 |
-
|
364 |
-
# Add Code Results title with markdown and emoji
|
365 |
-
combined_content += "# Code Results 🚀\n"
|
366 |
-
|
367 |
-
# Capture standard output
|
368 |
-
original_stdout = sys.stdout
|
369 |
-
sys.stdout = io.StringIO()
|
370 |
-
|
371 |
-
# Execute cleaned Python code and capture the output
|
372 |
-
try:
|
373 |
-
st.markdown('# Running exec.. ')
|
374 |
-
|
375 |
-
exec(cleaned_code)
|
376 |
-
code_output = sys.stdout.getvalue()
|
377 |
-
combined_content += f"```\n{code_output}\n```\n\n"
|
378 |
-
realtimeEvalResponse = "# Code Results 🚀\n" + "```" + code_output + "```\n\n"
|
379 |
-
|
380 |
-
st.write(realtimeEvalResponse)
|
381 |
-
|
382 |
-
st.markdown('# Completed exec.. ')
|
383 |
-
|
384 |
-
|
385 |
-
except Exception as e:
|
386 |
-
combined_content += f"```python\nError executing Python code: {e}\n```\n\n"
|
387 |
-
st.markdown('# Error in exec.. ' + combined_content)
|
388 |
-
|
389 |
-
# Restore the original standard output
|
390 |
-
sys.stdout = original_stdout
|
391 |
-
else:
|
392 |
-
# Add Resource title with markdown and emoji for non-Python resources
|
393 |
-
combined_content += "# Resource 🛠️\n" + "```" + resource + "```\n\n"
|
394 |
-
|
395 |
-
# Write the combined content into one file
|
396 |
-
with open(f"{base_filename}-Combined.md", 'w') as file:
|
397 |
-
file.write(combined_content)
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
def truncate_document(document, length):
|
402 |
return document[:length]
|
403 |
|
@@ -474,8 +384,7 @@ def extract_mime_type(file):
|
|
474 |
else:
|
475 |
raise TypeError("Input should be a string or a streamlit.UploadedFile object")
|
476 |
|
477 |
-
|
478 |
-
import re
|
479 |
|
480 |
def extract_file_extension(file):
|
481 |
# get the file name directly from the UploadedFile object
|
@@ -505,20 +414,6 @@ def pdf2txt(docs):
|
|
505 |
text += pdf.pages[page].extract_text() # new PyPDF2 syntax
|
506 |
except Exception as e:
|
507 |
st.write(f"Error processing file {file.name}: {e}")
|
508 |
-
|
509 |
-
return text
|
510 |
-
|
511 |
-
def pdf2txt_old(pdf_docs):
|
512 |
-
st.write(pdf_docs)
|
513 |
-
for file in pdf_docs:
|
514 |
-
mime_type = extract_mime_type(file)
|
515 |
-
st.write(f"MIME type of file: {mime_type}")
|
516 |
-
|
517 |
-
text = ""
|
518 |
-
for pdf in pdf_docs:
|
519 |
-
pdf_reader = PdfReader(pdf)
|
520 |
-
for page in pdf_reader.pages:
|
521 |
-
text += page.extract_text()
|
522 |
return text
|
523 |
|
524 |
def txt2chunks(text):
|
@@ -636,8 +531,6 @@ def main():
|
|
636 |
if st.button('💬 Chat'):
|
637 |
st.write('Reasoning with your inputs...')
|
638 |
|
639 |
-
#response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
|
640 |
-
|
641 |
# Divide the user_prompt into smaller sections
|
642 |
user_prompt_sections = divide_prompt(user_prompt, max_length)
|
643 |
full_response = ''
|
@@ -714,7 +607,6 @@ def main():
|
|
714 |
create_file(filename, user_prompt, response, should_save)
|
715 |
|
716 |
st.experimental_rerun()
|
717 |
-
#st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
718 |
|
719 |
if __name__ == "__main__":
|
720 |
main()
|
@@ -740,4 +632,3 @@ with st.sidebar:
|
|
740 |
st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
|
741 |
filename = generate_filename(raw, 'txt')
|
742 |
create_file(filename, raw, '', should_save)
|
743 |
-
#create_file(filename, raw, '')
|
|
|
15 |
import time
|
16 |
import re
|
17 |
import textract
|
18 |
+
import zipfile
|
19 |
import random
|
20 |
|
21 |
from datetime import datetime
|
|
|
33 |
from langchain.memory import ConversationBufferMemory
|
34 |
from langchain.chains import ConversationalRetrievalChain
|
35 |
from templates import css, bot_template, user_template
|
36 |
+
from io import BytesIO
|
37 |
+
|
38 |
|
39 |
# page config and sidebar declares up front allow all other functions to see global class variables
|
40 |
st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
|
|
|
165 |
add_paper_buttons_and_links()
|
166 |
|
167 |
|
|
|
168 |
# Process user input is a post processor algorithm which runs after document embedding vector DB play of GPT on context of documents..
|
169 |
def process_user_input(user_question):
|
170 |
# Check and initialize 'conversation' in session state if not present
|
|
|
206 |
return header, detail
|
207 |
return None, None
|
208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
def generate_filename(prompt, file_type):
|
210 |
central = pytz.timezone('US/Central')
|
211 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
|
|
308 |
file.write(combined_content)
|
309 |
|
310 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
def truncate_document(document, length):
|
312 |
return document[:length]
|
313 |
|
|
|
384 |
else:
|
385 |
raise TypeError("Input should be a string or a streamlit.UploadedFile object")
|
386 |
|
387 |
+
|
|
|
388 |
|
389 |
def extract_file_extension(file):
|
390 |
# get the file name directly from the UploadedFile object
|
|
|
414 |
text += pdf.pages[page].extract_text() # new PyPDF2 syntax
|
415 |
except Exception as e:
|
416 |
st.write(f"Error processing file {file.name}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
return text
|
418 |
|
419 |
def txt2chunks(text):
|
|
|
531 |
if st.button('💬 Chat'):
|
532 |
st.write('Reasoning with your inputs...')
|
533 |
|
|
|
|
|
534 |
# Divide the user_prompt into smaller sections
|
535 |
user_prompt_sections = divide_prompt(user_prompt, max_length)
|
536 |
full_response = ''
|
|
|
607 |
create_file(filename, user_prompt, response, should_save)
|
608 |
|
609 |
st.experimental_rerun()
|
|
|
610 |
|
611 |
if __name__ == "__main__":
|
612 |
main()
|
|
|
632 |
st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
|
633 |
filename = generate_filename(raw, 'txt')
|
634 |
create_file(filename, raw, '', should_save)
|
|