awacke1 commited on
Commit
e3d2f3a
Β·
1 Parent(s): a0feab3

Create backupapp.py

Browse files
Files changed (1) hide show
  1. backupapp.py +634 -0
backupapp.py ADDED
@@ -0,0 +1,634 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import streamlit.components.v1 as components
3
+
4
+ import openai
5
+ import os
6
+ import base64
7
+ import glob
8
+ import io
9
+ import json
10
+ import mistune
11
+ import pytz
12
+ import math
13
+ import requests
14
+ import sys
15
+ import time
16
+ import re
17
+ import textract
18
+ import zipfile
19
+ import random
20
+
21
+ from datetime import datetime
22
+ from openai import ChatCompletion
23
+ from xml.etree import ElementTree as ET
24
+ from bs4 import BeautifulSoup
25
+ from collections import deque
26
+ from audio_recorder_streamlit import audio_recorder
27
+ from dotenv import load_dotenv
28
+ from PyPDF2 import PdfReader
29
+ from langchain.text_splitter import CharacterTextSplitter
30
+ from langchain.embeddings import OpenAIEmbeddings
31
+ from langchain.vectorstores import FAISS
32
+ from langchain.chat_models import ChatOpenAI
33
+ from langchain.memory import ConversationBufferMemory
34
+ from langchain.chains import ConversationalRetrievalChain
35
+ from templates import css, bot_template, user_template
36
+ from io import BytesIO
37
+
38
+
39
+ # page config and sidebar declares up front allow all other functions to see global class variables
40
+ st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
41
+ should_save = st.sidebar.checkbox("πŸ’Ύ Save", value=True)
42
+
43
+
44
+ # Read it aloud
45
+ def readitaloud(result):
46
+ documentHTML5='''
47
+ <!DOCTYPE html>
48
+ <html>
49
+ <head>
50
+ <title>Read It Aloud</title>
51
+ <script type="text/javascript">
52
+ function readAloud() {
53
+ const text = document.getElementById("textArea").value;
54
+ const speech = new SpeechSynthesisUtterance(text);
55
+ window.speechSynthesis.speak(speech);
56
+ }
57
+ </script>
58
+ </head>
59
+ <body>
60
+ <h1>πŸ”Š Read It Aloud</h1>
61
+ <textarea id="textArea" rows="10" cols="80">
62
+ '''
63
+ documentHTML5 = documentHTML5 + result
64
+ documentHTML5 = documentHTML5 + '''
65
+ </textarea>
66
+ <br>
67
+ <button onclick="readAloud()">πŸ”Š Read Aloud</button>
68
+ </body>
69
+ </html>
70
+ '''
71
+
72
+ components.html(documentHTML5, width=800, height=300)
73
+ #return result
74
+
75
+ # Chat and Chat with files
76
+ def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
77
+ model = model_choice
78
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
79
+ conversation.append({'role': 'user', 'content': prompt})
80
+ if len(document_section)>0:
81
+ conversation.append({'role': 'assistant', 'content': document_section})
82
+
83
+ start_time = time.time()
84
+ report = []
85
+ res_box = st.empty()
86
+ collected_chunks = []
87
+ collected_messages = []
88
+
89
+ key = os.getenv('OPENAI_API_KEY')
90
+ openai.api_key = key
91
+ for chunk in openai.ChatCompletion.create(
92
+ model='gpt-3.5-turbo',
93
+ messages=conversation,
94
+ temperature=0.5,
95
+ stream=True
96
+ ):
97
+
98
+ collected_chunks.append(chunk) # save the event response
99
+ chunk_message = chunk['choices'][0]['delta'] # extract the message
100
+ collected_messages.append(chunk_message) # save the message
101
+
102
+ content=chunk["choices"][0].get("delta",{}).get("content")
103
+
104
+ try:
105
+ report.append(content)
106
+ if len(content) > 0:
107
+ result = "".join(report).strip()
108
+ #result = result.replace("\n", "")
109
+ res_box.markdown(f'*{result}*')
110
+ except:
111
+ st.write(' ')
112
+
113
+ full_reply_content = ''.join([m.get('content', '') for m in collected_messages])
114
+ st.write("Elapsed time:")
115
+ st.write(time.time() - start_time)
116
+ readitaloud(full_reply_content)
117
+ return full_reply_content
118
+
119
+ def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
120
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
121
+ conversation.append({'role': 'user', 'content': prompt})
122
+ if len(file_content)>0:
123
+ conversation.append({'role': 'assistant', 'content': file_content})
124
+ response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
125
+ return response['choices'][0]['message']['content']
126
+
127
+
128
+ def link_button_with_emoji(url, title, emoji_summary):
129
+ emojis = ["πŸ’‰", "πŸ₯", "🌑️", "🩺", "πŸ”¬", "πŸ’Š", "πŸ§ͺ", "πŸ‘¨β€βš•οΈ", "πŸ‘©β€βš•οΈ"]
130
+ random_emoji = random.choice(emojis)
131
+ st.markdown(f"[{random_emoji} {emoji_summary} - {title}]({url})")
132
+
133
+ # Define function to add paper buttons and links
134
+ def add_paper_buttons_and_links():
135
+ col1, col2, col3, col4 = st.columns(4)
136
+
137
+ with col1:
138
+ with st.expander("MemGPT πŸ§ πŸ’Ύ", expanded=False):
139
+ link_button_with_emoji("https://arxiv.org/abs/2310.08560", "MemGPT", "πŸ§ πŸ’Ύ Memory OS")
140
+ outline_memgpt = "Memory Hierarchy, Context Paging, Self-directed Memory Updates, Memory Editing, Memory Retrieval, Preprompt Instructions, Semantic Memory, Episodic Memory, Emotional Contextual Understanding"
141
+ if st.button("Discuss MemGPT Features"):
142
+ chat_with_model("Discuss the key features of MemGPT: " + outline_memgpt, "MemGPT")
143
+
144
+ with col2:
145
+ with st.expander("AutoGen πŸ€–πŸ”—", expanded=False):
146
+ link_button_with_emoji("https://arxiv.org/abs/2308.08155", "AutoGen", "πŸ€–πŸ”— Multi-Agent LLM")
147
+ outline_autogen = "Cooperative Conversations, Combining Capabilities, Complex Task Solving, Divergent Thinking, Factuality, Highly Capable Agents, Generic Abstraction, Effective Implementation"
148
+ if st.button("Explore AutoGen Multi-Agent LLM"):
149
+ chat_with_model("Explore the key features of AutoGen: " + outline_autogen, "AutoGen")
150
+
151
+ with col3:
152
+ with st.expander("Whisper πŸ”ŠπŸ§‘β€πŸš€", expanded=False):
153
+ link_button_with_emoji("https://arxiv.org/abs/2212.04356", "Whisper", "πŸ”ŠπŸ§‘β€πŸš€ Robust STT")
154
+ outline_whisper = "Scaling, Deep Learning Approaches, Weak Supervision, Zero-shot Transfer Learning, Accuracy & Robustness, Pre-training Techniques, Broad Range of Environments, Combining Multiple Datasets"
155
+ if st.button("Learn About Whisper STT"):
156
+ chat_with_model("Learn about the key features of Whisper: " + outline_whisper, "Whisper")
157
+
158
+ with col4:
159
+ with st.expander("ChatDev πŸ’¬πŸ’»", expanded=False):
160
+ link_button_with_emoji("https://arxiv.org/pdf/2307.07924.pdf", "ChatDev", "πŸ’¬πŸ’» Comm. Agents")
161
+ outline_chatdev = "Effective Communication, Comprehensive Software Solutions, Diverse Social Identities, Tailored Codes, Environment Dependencies, User Manuals"
162
+ if st.button("Deep Dive into ChatDev"):
163
+ chat_with_model("Deep dive into the features of ChatDev: " + outline_chatdev, "ChatDev")
164
+
165
+ add_paper_buttons_and_links()
166
+
167
+
168
+ # Process user input is a post processor algorithm which runs after document embedding vector DB play of GPT on context of documents..
169
+ def process_user_input(user_question):
170
+ # Check and initialize 'conversation' in session state if not present
171
+ if 'conversation' not in st.session_state:
172
+ st.session_state.conversation = {} # Initialize with an empty dictionary or an appropriate default value
173
+
174
+ response = st.session_state.conversation({'question': user_question})
175
+ st.session_state.chat_history = response['chat_history']
176
+
177
+ for i, message in enumerate(st.session_state.chat_history):
178
+ template = user_template if i % 2 == 0 else bot_template
179
+ st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
180
+
181
+ # Save file output from PDF query results
182
+ filename = generate_filename(user_question, 'txt')
183
+ create_file(filename, user_question, message.content, should_save)
184
+
185
+ # New functionality to create expanders and buttons
186
+ create_expanders_and_buttons(message.content)
187
+
188
+ def create_expanders_and_buttons(content):
189
+ # Split the content into paragraphs
190
+ paragraphs = content.split("\n\n")
191
+ for paragraph in paragraphs:
192
+ # Identify the header and detail in the paragraph
193
+ header, detail = extract_feature_and_detail(paragraph)
194
+ if header and detail:
195
+ with st.expander(header, expanded=False):
196
+ if st.button(f"Explore {header}"):
197
+ expanded_outline = "Expand on the feature: " + detail
198
+ chat_with_model(expanded_outline, header)
199
+
200
+ def extract_feature_and_detail(paragraph):
201
+ # Use regex to find the header and detail in the paragraph
202
+ match = re.match(r"(.*?):(.*)", paragraph)
203
+ if match:
204
+ header = match.group(1).strip()
205
+ detail = match.group(2).strip()
206
+ return header, detail
207
+ return None, None
208
+
209
+ def generate_filename(prompt, file_type):
210
+ central = pytz.timezone('US/Central')
211
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
212
+ replaced_prompt = prompt.replace(" ", "_").replace("\n", "_")
213
+ safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:90]
214
+ return f"{safe_date_time}_{safe_prompt}.{file_type}"
215
+
216
+ def transcribe_audio(file_path, model):
217
+ key = os.getenv('OPENAI_API_KEY')
218
+ headers = {
219
+ "Authorization": f"Bearer {key}",
220
+ }
221
+ with open(file_path, 'rb') as f:
222
+ data = {'file': f}
223
+ st.write("Read file {file_path}", file_path)
224
+ OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions"
225
+ response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model})
226
+ if response.status_code == 200:
227
+ st.write(response.json())
228
+ chatResponse = chat_with_model(response.json().get('text'), '') # *************************************
229
+ transcript = response.json().get('text')
230
+ #st.write('Responses:')
231
+ #st.write(chatResponse)
232
+ filename = generate_filename(transcript, 'txt')
233
+ #create_file(filename, transcript, chatResponse)
234
+ response = chatResponse
235
+ user_prompt = transcript
236
+ create_file(filename, user_prompt, response, should_save)
237
+ return transcript
238
+ else:
239
+ st.write(response.json())
240
+ st.error("Error in API call.")
241
+ return None
242
+
243
+ def save_and_play_audio(audio_recorder):
244
+ audio_bytes = audio_recorder()
245
+ if audio_bytes:
246
+ filename = generate_filename("Recording", "wav")
247
+ with open(filename, 'wb') as f:
248
+ f.write(audio_bytes)
249
+ st.audio(audio_bytes, format="audio/wav")
250
+ return filename
251
+ return None
252
+
253
+
254
+ # Define a context dictionary to maintain the state between exec calls
255
+ context = {}
256
+
257
+ def create_file(filename, prompt, response, should_save=True):
258
+ if not should_save:
259
+ return
260
+
261
+ # Extract base filename without extension
262
+ base_filename, ext = os.path.splitext(filename)
263
+
264
+ # Initialize the combined content
265
+ combined_content = ""
266
+
267
+ # Add Prompt with markdown title and emoji
268
+ combined_content += "# Prompt πŸ“\n" + prompt + "\n\n"
269
+
270
+ # Add Response with markdown title and emoji
271
+ combined_content += "# Response πŸ’¬\n" + response + "\n\n"
272
+
273
+ # Check for code blocks in the response
274
+ resources = re.findall(r"```([\s\S]*?)```", response)
275
+ for resource in resources:
276
+ # Check if the resource contains Python code
277
+ if "python" in resource.lower():
278
+ # Remove the 'python' keyword from the code block
279
+ cleaned_code = re.sub(r'^\s*python', '', resource, flags=re.IGNORECASE | re.MULTILINE)
280
+
281
+ # Add Code Results title with markdown and emoji
282
+ combined_content += "# Code Results πŸš€\n"
283
+
284
+ # Redirect standard output to capture it
285
+ original_stdout = sys.stdout
286
+ sys.stdout = io.StringIO()
287
+
288
+ # Execute the cleaned Python code within the context
289
+ try:
290
+ exec(cleaned_code, context)
291
+ code_output = sys.stdout.getvalue()
292
+ combined_content += f"```\n{code_output}\n```\n\n"
293
+ realtimeEvalResponse = "# Code Results πŸš€\n" + "```" + code_output + "```\n\n"
294
+ st.write(realtimeEvalResponse)
295
+
296
+ except Exception as e:
297
+ combined_content += f"```python\nError executing Python code: {e}\n```\n\n"
298
+
299
+ # Restore the original standard output
300
+ sys.stdout = original_stdout
301
+ else:
302
+ # Add non-Python resources with markdown and emoji
303
+ combined_content += "# Resource πŸ› οΈ\n" + "```" + resource + "```\n\n"
304
+
305
+ # Save the combined content to a Markdown file
306
+ if should_save:
307
+ with open(f"{base_filename}.md", 'w') as file:
308
+ file.write(combined_content)
309
+
310
+
311
+ def truncate_document(document, length):
312
+ return document[:length]
313
+
314
+ def divide_document(document, max_length):
315
+ return [document[i:i+max_length] for i in range(0, len(document), max_length)]
316
+
317
+ def get_table_download_link(file_path):
318
+ with open(file_path, 'r') as file:
319
+ try:
320
+ data = file.read()
321
+ except:
322
+ st.write('')
323
+ return file_path
324
+ b64 = base64.b64encode(data.encode()).decode()
325
+ file_name = os.path.basename(file_path)
326
+ ext = os.path.splitext(file_name)[1] # get the file extension
327
+ if ext == '.txt':
328
+ mime_type = 'text/plain'
329
+ elif ext == '.py':
330
+ mime_type = 'text/plain'
331
+ elif ext == '.xlsx':
332
+ mime_type = 'text/plain'
333
+ elif ext == '.csv':
334
+ mime_type = 'text/plain'
335
+ elif ext == '.htm':
336
+ mime_type = 'text/html'
337
+ elif ext == '.md':
338
+ mime_type = 'text/markdown'
339
+ else:
340
+ mime_type = 'application/octet-stream' # general binary data type
341
+ href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
342
+ return href
343
+
344
+ def CompressXML(xml_text):
345
+ root = ET.fromstring(xml_text)
346
+ for elem in list(root.iter()):
347
+ if isinstance(elem.tag, str) and 'Comment' in elem.tag:
348
+ elem.parent.remove(elem)
349
+ return ET.tostring(root, encoding='unicode', method="xml")
350
+
351
+ def read_file_content(file,max_length):
352
+ if file.type == "application/json":
353
+ content = json.load(file)
354
+ return str(content)
355
+ elif file.type == "text/html" or file.type == "text/htm":
356
+ content = BeautifulSoup(file, "html.parser")
357
+ return content.text
358
+ elif file.type == "application/xml" or file.type == "text/xml":
359
+ tree = ET.parse(file)
360
+ root = tree.getroot()
361
+ xml = CompressXML(ET.tostring(root, encoding='unicode'))
362
+ return xml
363
+ elif file.type == "text/markdown" or file.type == "text/md":
364
+ md = mistune.create_markdown()
365
+ content = md(file.read().decode())
366
+ return content
367
+ elif file.type == "text/plain":
368
+ return file.getvalue().decode()
369
+ else:
370
+ return ""
371
+
372
+ def extract_mime_type(file):
373
+ # Check if the input is a string
374
+ if isinstance(file, str):
375
+ pattern = r"type='(.*?)'"
376
+ match = re.search(pattern, file)
377
+ if match:
378
+ return match.group(1)
379
+ else:
380
+ raise ValueError(f"Unable to extract MIME type from {file}")
381
+ # If it's not a string, assume it's a streamlit.UploadedFile object
382
+ elif isinstance(file, streamlit.UploadedFile):
383
+ return file.type
384
+ else:
385
+ raise TypeError("Input should be a string or a streamlit.UploadedFile object")
386
+
387
+
388
+
389
+ def extract_file_extension(file):
390
+ # get the file name directly from the UploadedFile object
391
+ file_name = file.name
392
+ pattern = r".*?\.(.*?)$"
393
+ match = re.search(pattern, file_name)
394
+ if match:
395
+ return match.group(1)
396
+ else:
397
+ raise ValueError(f"Unable to extract file extension from {file_name}")
398
+
399
+ def pdf2txt(docs):
400
+ text = ""
401
+ for file in docs:
402
+ file_extension = extract_file_extension(file)
403
+ # print the file extension
404
+ st.write(f"File type extension: {file_extension}")
405
+
406
+ # read the file according to its extension
407
+ try:
408
+ if file_extension.lower() in ['py', 'txt', 'html', 'htm', 'xml', 'json']:
409
+ text += file.getvalue().decode('utf-8')
410
+ elif file_extension.lower() == 'pdf':
411
+ from PyPDF2 import PdfReader
412
+ pdf = PdfReader(BytesIO(file.getvalue()))
413
+ for page in range(len(pdf.pages)):
414
+ text += pdf.pages[page].extract_text() # new PyPDF2 syntax
415
+ except Exception as e:
416
+ st.write(f"Error processing file {file.name}: {e}")
417
+ return text
418
+
419
+ def txt2chunks(text):
420
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
421
+ return text_splitter.split_text(text)
422
+
423
+ def vector_store(text_chunks):
424
+ key = os.getenv('OPENAI_API_KEY')
425
+ embeddings = OpenAIEmbeddings(openai_api_key=key)
426
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
427
+
428
+ def get_chain(vectorstore):
429
+ llm = ChatOpenAI()
430
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
431
+ return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
432
+
433
+ def divide_prompt(prompt, max_length):
434
+ words = prompt.split()
435
+ chunks = []
436
+ current_chunk = []
437
+ current_length = 0
438
+ for word in words:
439
+ if len(word) + current_length <= max_length:
440
+ current_length += len(word) + 1 # Adding 1 to account for spaces
441
+ current_chunk.append(word)
442
+ else:
443
+ chunks.append(' '.join(current_chunk))
444
+ current_chunk = [word]
445
+ current_length = len(word)
446
+ chunks.append(' '.join(current_chunk)) # Append the final chunk
447
+ return chunks
448
+
449
+ def create_zip_of_files(files):
450
+ """
451
+ Create a zip file from a list of files.
452
+ """
453
+ zip_name = "all_files.zip"
454
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
455
+ for file in files:
456
+ zipf.write(file)
457
+ return zip_name
458
+
459
+
460
+ def get_zip_download_link(zip_file):
461
+ """
462
+ Generate a link to download the zip file.
463
+ """
464
+ with open(zip_file, 'rb') as f:
465
+ data = f.read()
466
+ b64 = base64.b64encode(data).decode()
467
+ href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
468
+ return href
469
+
470
+
471
+ def main():
472
+
473
+ col1, col2, col3, col4 = st.columns(4)
474
+
475
+ with col1:
476
+ with st.expander("Settings πŸ§ πŸ’Ύ", expanded=False):
477
+ # File type for output, model choice
478
+ menu = ["txt", "htm", "xlsx", "csv", "md", "py"]
479
+ choice = st.sidebar.selectbox("Output File Type:", menu)
480
+ model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
481
+
482
+ # Audio, transcribe, GPT:
483
+ filename = save_and_play_audio(audio_recorder)
484
+
485
+ if filename is not None:
486
+ try:
487
+ transcription = transcribe_audio(filename, "whisper-1")
488
+ except:
489
+ st.write(' ')
490
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
491
+ filename = None
492
+
493
+ # prompt interfaces
494
+ user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
495
+
496
+ # file section interface for prompts against large documents as context
497
+ collength, colupload = st.columns([2,3]) # adjust the ratio as needed
498
+ with collength:
499
+ max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
500
+ with colupload:
501
+ uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx", "csv", "html", "htm", "md", "txt"])
502
+
503
+
504
+ # Document section chat
505
+
506
+ document_sections = deque()
507
+ document_responses = {}
508
+ if uploaded_file is not None:
509
+ file_content = read_file_content(uploaded_file, max_length)
510
+ document_sections.extend(divide_document(file_content, max_length))
511
+ if len(document_sections) > 0:
512
+ if st.button("πŸ‘οΈ View Upload"):
513
+ st.markdown("**Sections of the uploaded file:**")
514
+ for i, section in enumerate(list(document_sections)):
515
+ st.markdown(f"**Section {i+1}**\n{section}")
516
+ st.markdown("**Chat with the model:**")
517
+ for i, section in enumerate(list(document_sections)):
518
+ if i in document_responses:
519
+ st.markdown(f"**Section {i+1}**\n{document_responses[i]}")
520
+ else:
521
+ if st.button(f"Chat about Section {i+1}"):
522
+ st.write('Reasoning with your inputs...')
523
+ response = chat_with_model(user_prompt, section, model_choice) # *************************************
524
+ st.write('Response:')
525
+ st.write(response)
526
+ document_responses[i] = response
527
+ filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
528
+ create_file(filename, user_prompt, response, should_save)
529
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
530
+
531
+ if st.button('πŸ’¬ Chat'):
532
+ st.write('Reasoning with your inputs...')
533
+
534
+ # Divide the user_prompt into smaller sections
535
+ user_prompt_sections = divide_prompt(user_prompt, max_length)
536
+ full_response = ''
537
+ for prompt_section in user_prompt_sections:
538
+ # Process each section with the model
539
+ response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice)
540
+ full_response += response + '\n' # Combine the responses
541
+
542
+ #st.write('Response:')
543
+ #st.write(full_response)
544
+
545
+ response = full_response
546
+ st.write('Response:')
547
+ st.write(response)
548
+
549
+ filename = generate_filename(user_prompt, choice)
550
+ create_file(filename, user_prompt, response, should_save)
551
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
552
+
553
+ all_files = glob.glob("*.*")
554
+ all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
555
+ all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
556
+
557
+
558
+ # Sidebar buttons Download All and Delete All
559
+ colDownloadAll, colDeleteAll = st.sidebar.columns([3,3])
560
+ with colDownloadAll:
561
+ if st.button("⬇️ Download All"):
562
+ zip_file = create_zip_of_files(all_files)
563
+ st.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
564
+ with colDeleteAll:
565
+ if st.button("πŸ—‘ Delete All"):
566
+ for file in all_files:
567
+ os.remove(file)
568
+ st.experimental_rerun()
569
+
570
+ # Sidebar of Files Saving History and surfacing files as context of prompts and responses
571
+ file_contents=''
572
+ next_action=''
573
+ for file in all_files:
574
+ col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
575
+ with col1:
576
+ if st.button("🌐", key="md_"+file): # md emoji button
577
+ with open(file, 'r') as f:
578
+ file_contents = f.read()
579
+ next_action='md'
580
+ with col2:
581
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
582
+ with col3:
583
+ if st.button("πŸ“‚", key="open_"+file): # open emoji button
584
+ with open(file, 'r') as f:
585
+ file_contents = f.read()
586
+ next_action='open'
587
+ with col4:
588
+ if st.button("πŸ”", key="read_"+file): # search emoji button
589
+ with open(file, 'r') as f:
590
+ file_contents = f.read()
591
+ next_action='search'
592
+ with col5:
593
+ if st.button("πŸ—‘", key="delete_"+file):
594
+ os.remove(file)
595
+ st.experimental_rerun()
596
+
597
+ if len(file_contents) > 0:
598
+ if next_action=='open':
599
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
600
+ if next_action=='md':
601
+ st.markdown(file_contents)
602
+ if next_action=='search':
603
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
604
+ st.write('Reasoning with your inputs...')
605
+ response = chat_with_model(user_prompt, file_contents, model_choice)
606
+ filename = generate_filename(file_contents, choice)
607
+ create_file(filename, user_prompt, response, should_save)
608
+
609
+ st.experimental_rerun()
610
+
611
+ if __name__ == "__main__":
612
+ main()
613
+
614
+ load_dotenv()
615
+ st.write(css, unsafe_allow_html=True)
616
+
617
+ st.header("Chat with documents :books:")
618
+ user_question = st.text_input("Ask a question about your documents:")
619
+ if user_question:
620
+ process_user_input(user_question)
621
+
622
+ with st.sidebar:
623
+ st.subheader("Your documents")
624
+ docs = st.file_uploader("import documents", accept_multiple_files=True)
625
+ with st.spinner("Processing"):
626
+ raw = pdf2txt(docs)
627
+ if len(raw) > 0:
628
+ length = str(len(raw))
629
+ text_chunks = txt2chunks(raw)
630
+ vectorstore = vector_store(text_chunks)
631
+ st.session_state.conversation = get_chain(vectorstore)
632
+ st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
633
+ filename = generate_filename(raw, 'txt')
634
+ create_file(filename, raw, '', should_save)