awacke1 commited on
Commit
a0feab3
·
1 Parent(s): 875b4b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -113
app.py CHANGED
@@ -15,7 +15,7 @@ import sys
15
  import time
16
  import re
17
  import textract
18
- import zipfile # New import for zipping files
19
  import random
20
 
21
  from datetime import datetime
@@ -33,6 +33,8 @@ from langchain.chat_models import ChatOpenAI
33
  from langchain.memory import ConversationBufferMemory
34
  from langchain.chains import ConversationalRetrievalChain
35
  from templates import css, bot_template, user_template
 
 
36
 
37
  # page config and sidebar declares up front allow all other functions to see global class variables
38
  st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
@@ -163,7 +165,6 @@ def add_paper_buttons_and_links():
163
  add_paper_buttons_and_links()
164
 
165
 
166
-
167
  # Process user input is a post processor algorithm which runs after document embedding vector DB play of GPT on context of documents..
168
  def process_user_input(user_question):
169
  # Check and initialize 'conversation' in session state if not present
@@ -205,32 +206,6 @@ def extract_feature_and_detail(paragraph):
205
  return header, detail
206
  return None, None
207
 
208
-
209
- def process_user_input_old(user_question):
210
- response = st.session_state.conversation({'question': user_question})
211
- st.session_state.chat_history = response['chat_history']
212
- for i, message in enumerate(st.session_state.chat_history):
213
- template = user_template if i % 2 == 0 else bot_template
214
- st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
215
- # Save file output from PDF query results
216
- filename = generate_filename(user_question, 'txt')
217
- #create_file(filename, user_question, message.content)
218
- response = message.content
219
- user_prompt = user_question
220
- create_file(filename, user_prompt, response, should_save)
221
- #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
222
-
223
-
224
-
225
-
226
-
227
-
228
- def generate_filename_old(prompt, file_type):
229
- central = pytz.timezone('US/Central')
230
- safe_date_time = datetime.now(central).strftime("%m%d_%H%M") # Date and time DD-HHMM
231
- safe_prompt = "".join(x for x in prompt if x.isalnum())[:90] # Limit file name size and trim whitespace
232
- return f"{safe_date_time}_{safe_prompt}.{file_type}" # Return a safe file name
233
-
234
  def generate_filename(prompt, file_type):
235
  central = pytz.timezone('US/Central')
236
  safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
@@ -333,71 +308,6 @@ def create_file(filename, prompt, response, should_save=True):
333
  file.write(combined_content)
334
 
335
 
336
- def create_file_old2(filename, prompt, response, should_save=True):
337
- if not should_save:
338
- return
339
-
340
- # Step 2: Extract base filename without extension
341
- base_filename, ext = os.path.splitext(filename)
342
-
343
- # Step 3: Check if the response contains Python code
344
- has_python_code = bool(re.search(r"```python([\s\S]*?)```", response))
345
-
346
- # Step 4: Initialize the combined content
347
- combined_content = ""
348
-
349
- # Add Prompt with markdown title and emoji
350
- combined_content += "# Prompt 📝\n" + prompt + "\n\n"
351
-
352
- # Add Response with markdown title and emoji
353
- combined_content += "# Response 💬\n" + response + "\n\n"
354
-
355
- # Check for Python code or other resources and add them with markdown title and emoji
356
- resources = re.findall(r"```([\s\S]*?)```", response)
357
- for resource in resources:
358
- # Check if the resource contains Python code
359
- if "python" in resource.lower():
360
- st.markdown('# Running python.. ')
361
- # Remove the word 'python' from the beginning of the code block
362
- cleaned_code = re.sub(r'^\s*python', '', resource, flags=re.IGNORECASE | re.MULTILINE)
363
-
364
- # Add Code Results title with markdown and emoji
365
- combined_content += "# Code Results 🚀\n"
366
-
367
- # Capture standard output
368
- original_stdout = sys.stdout
369
- sys.stdout = io.StringIO()
370
-
371
- # Execute cleaned Python code and capture the output
372
- try:
373
- st.markdown('# Running exec.. ')
374
-
375
- exec(cleaned_code)
376
- code_output = sys.stdout.getvalue()
377
- combined_content += f"```\n{code_output}\n```\n\n"
378
- realtimeEvalResponse = "# Code Results 🚀\n" + "```" + code_output + "```\n\n"
379
-
380
- st.write(realtimeEvalResponse)
381
-
382
- st.markdown('# Completed exec.. ')
383
-
384
-
385
- except Exception as e:
386
- combined_content += f"```python\nError executing Python code: {e}\n```\n\n"
387
- st.markdown('# Error in exec.. ' + combined_content)
388
-
389
- # Restore the original standard output
390
- sys.stdout = original_stdout
391
- else:
392
- # Add Resource title with markdown and emoji for non-Python resources
393
- combined_content += "# Resource 🛠️\n" + "```" + resource + "```\n\n"
394
-
395
- # Write the combined content into one file
396
- with open(f"{base_filename}-Combined.md", 'w') as file:
397
- file.write(combined_content)
398
-
399
-
400
-
401
  def truncate_document(document, length):
402
  return document[:length]
403
 
@@ -474,8 +384,7 @@ def extract_mime_type(file):
474
  else:
475
  raise TypeError("Input should be a string or a streamlit.UploadedFile object")
476
 
477
- from io import BytesIO
478
- import re
479
 
480
  def extract_file_extension(file):
481
  # get the file name directly from the UploadedFile object
@@ -505,20 +414,6 @@ def pdf2txt(docs):
505
  text += pdf.pages[page].extract_text() # new PyPDF2 syntax
506
  except Exception as e:
507
  st.write(f"Error processing file {file.name}: {e}")
508
-
509
- return text
510
-
511
- def pdf2txt_old(pdf_docs):
512
- st.write(pdf_docs)
513
- for file in pdf_docs:
514
- mime_type = extract_mime_type(file)
515
- st.write(f"MIME type of file: {mime_type}")
516
-
517
- text = ""
518
- for pdf in pdf_docs:
519
- pdf_reader = PdfReader(pdf)
520
- for page in pdf_reader.pages:
521
- text += page.extract_text()
522
  return text
523
 
524
  def txt2chunks(text):
@@ -636,8 +531,6 @@ def main():
636
  if st.button('💬 Chat'):
637
  st.write('Reasoning with your inputs...')
638
 
639
- #response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
640
-
641
  # Divide the user_prompt into smaller sections
642
  user_prompt_sections = divide_prompt(user_prompt, max_length)
643
  full_response = ''
@@ -714,7 +607,6 @@ def main():
714
  create_file(filename, user_prompt, response, should_save)
715
 
716
  st.experimental_rerun()
717
- #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
718
 
719
  if __name__ == "__main__":
720
  main()
@@ -740,4 +632,3 @@ with st.sidebar:
740
  st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
741
  filename = generate_filename(raw, 'txt')
742
  create_file(filename, raw, '', should_save)
743
- #create_file(filename, raw, '')
 
15
  import time
16
  import re
17
  import textract
18
+ import zipfile
19
  import random
20
 
21
  from datetime import datetime
 
33
  from langchain.memory import ConversationBufferMemory
34
  from langchain.chains import ConversationalRetrievalChain
35
  from templates import css, bot_template, user_template
36
+ from io import BytesIO
37
+
38
 
39
  # page config and sidebar declares up front allow all other functions to see global class variables
40
  st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
 
165
  add_paper_buttons_and_links()
166
 
167
 
 
168
  # Process user input is a post processor algorithm which runs after document embedding vector DB play of GPT on context of documents..
169
  def process_user_input(user_question):
170
  # Check and initialize 'conversation' in session state if not present
 
206
  return header, detail
207
  return None, None
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  def generate_filename(prompt, file_type):
210
  central = pytz.timezone('US/Central')
211
  safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
 
308
  file.write(combined_content)
309
 
310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  def truncate_document(document, length):
312
  return document[:length]
313
 
 
384
  else:
385
  raise TypeError("Input should be a string or a streamlit.UploadedFile object")
386
 
387
+
 
388
 
389
  def extract_file_extension(file):
390
  # get the file name directly from the UploadedFile object
 
414
  text += pdf.pages[page].extract_text() # new PyPDF2 syntax
415
  except Exception as e:
416
  st.write(f"Error processing file {file.name}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  return text
418
 
419
  def txt2chunks(text):
 
531
  if st.button('💬 Chat'):
532
  st.write('Reasoning with your inputs...')
533
 
 
 
534
  # Divide the user_prompt into smaller sections
535
  user_prompt_sections = divide_prompt(user_prompt, max_length)
536
  full_response = ''
 
607
  create_file(filename, user_prompt, response, should_save)
608
 
609
  st.experimental_rerun()
 
610
 
611
  if __name__ == "__main__":
612
  main()
 
632
  st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
633
  filename = generate_filename(raw, 'txt')
634
  create_file(filename, raw, '', should_save)