Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -53,7 +53,14 @@ st.set_page_config(
|
|
53 |
}
|
54 |
)
|
55 |
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
# HTML5 based Speech Synthesis (Text to Speech in Browser)
|
59 |
@st.cache_resource
|
@@ -359,6 +366,19 @@ def display_glossary_grid(roleplaying_glossary):
|
|
359 |
st.markdown(f"**{term}** <small>{links_md}</small>", unsafe_allow_html=True)
|
360 |
|
361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
@st.cache_resource
|
363 |
def get_table_download_link(file_path):
|
364 |
|
@@ -576,10 +596,26 @@ def FileSidebar():
|
|
576 |
|
577 |
if next_action=='md':
|
578 |
st.markdown(file_contents)
|
|
|
|
|
579 |
buttonlabel = 'πRun'
|
580 |
if st.button(key='Runmd', label = buttonlabel):
|
581 |
-
|
582 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
583 |
#search_glossary(file_contents)
|
584 |
#except:
|
585 |
#st.markdown('GPT is sleeping. Restart ETA 30 seconds.')
|
@@ -752,7 +788,7 @@ def display_videos_and_links(num_columns):
|
|
752 |
display_glossary_entity(k)
|
753 |
col_index += 1 # Increment column index to place the next video in the next column
|
754 |
|
755 |
-
|
756 |
def display_images_and_wikipedia_summaries(num_columns=4):
|
757 |
image_files = [f for f in os.listdir('.') if f.endswith('.png')]
|
758 |
if not image_files:
|
@@ -1242,35 +1278,6 @@ def get_audio_download_link(file_path):
|
|
1242 |
|
1243 |
|
1244 |
|
1245 |
-
|
1246 |
-
# π΅ Wav Audio files - Transcription History in Wav
|
1247 |
-
all_files = glob.glob("*.wav")
|
1248 |
-
all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
|
1249 |
-
all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
|
1250 |
-
|
1251 |
-
filekey = 'delall'
|
1252 |
-
if st.sidebar.button("π Delete All Audio", key=filekey):
|
1253 |
-
for file in all_files:
|
1254 |
-
os.remove(file)
|
1255 |
-
st.rerun()
|
1256 |
-
|
1257 |
-
for file in all_files:
|
1258 |
-
col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed
|
1259 |
-
with col1:
|
1260 |
-
st.markdown(file)
|
1261 |
-
if st.button("π΅", key="play_" + file): # play emoji button
|
1262 |
-
audio_file = open(file, 'rb')
|
1263 |
-
audio_bytes = audio_file.read()
|
1264 |
-
st.audio(audio_bytes, format='audio/wav')
|
1265 |
-
#st.markdown(get_audio_download_link(file), unsafe_allow_html=True)
|
1266 |
-
#st.text_input(label="", value=file)
|
1267 |
-
with col2:
|
1268 |
-
if st.button("π", key="delete_" + file):
|
1269 |
-
os.remove(file)
|
1270 |
-
st.rerun()
|
1271 |
-
|
1272 |
-
|
1273 |
-
|
1274 |
GiveFeedback=False
|
1275 |
if GiveFeedback:
|
1276 |
with st.expander("Give your feedback π", expanded=False):
|
@@ -1336,18 +1343,6 @@ def transcribe_canary(filename):
|
|
1336 |
st.write(result)
|
1337 |
return result
|
1338 |
|
1339 |
-
# ChatBot client chat completions ------------------------- !!
|
1340 |
-
def process_text2(MODEL='gpt-4o-2024-05-13', text_input='What is 2+2 and what is an imaginary number'):
|
1341 |
-
if text_input:
|
1342 |
-
completion = client.chat.completions.create(
|
1343 |
-
model=MODEL,
|
1344 |
-
messages=st.session_state.messages
|
1345 |
-
)
|
1346 |
-
return_text = completion.choices[0].message.content
|
1347 |
-
st.write("Assistant: " + return_text)
|
1348 |
-
filename = generate_filename(text_input, "md")
|
1349 |
-
create_file(filename, text_input, return_text, should_save)
|
1350 |
-
return return_text
|
1351 |
|
1352 |
# Transcript to arxiv and client chat completion ------------------------- !!
|
1353 |
filename = save_and_play_audio(audio_recorder)
|
@@ -1397,12 +1392,12 @@ if example_input:
|
|
1397 |
for example_input in session_state["search_queries"]:
|
1398 |
st.write(example_input)
|
1399 |
|
1400 |
-
if st.button("Run Prompt", help="Click to run."):
|
1401 |
-
try:
|
1402 |
-
response=StreamLLMChatResponse(example_input)
|
1403 |
-
create_file(filename, example_input, response, should_save)
|
1404 |
-
except:
|
1405 |
-
st.write('model is asleep. Starting now on A10 GPU. Please wait one minute then retry. KEDA triggered.')
|
1406 |
|
1407 |
openai.api_key = os.getenv('OPENAI_API_KEY')
|
1408 |
if openai.api_key == None: openai.api_key = st.secrets['OPENAI_API_KEY']
|
@@ -1446,7 +1441,7 @@ if AddAFileForContext:
|
|
1446 |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
1447 |
|
1448 |
|
1449 |
-
# documentation
|
1450 |
# 1. Cookbook: https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o
|
1451 |
# 2. Configure your Project and Orgs to limit/allow Models: https://platform.openai.com/settings/organization/general
|
1452 |
# 3. Watch your Billing! https://platform.openai.com/settings/organization/billing/overview
|
@@ -1487,17 +1482,36 @@ def process_text(text_input):
|
|
1487 |
|
1488 |
#st.write("Assistant: " + completion.choices[0].message.content)
|
1489 |
|
|
|
|
|
|
|
1490 |
|
1491 |
-
|
1492 |
-
|
1493 |
-
|
1494 |
-
|
1495 |
-
|
|
|
|
|
|
|
|
|
|
|
1496 |
with open(filename, "wb") as f:
|
1497 |
-
f.write(
|
1498 |
return filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1499 |
|
1500 |
-
def process_image(image_input):
|
1501 |
if image_input:
|
1502 |
st.markdown('Processing image: ' + image_input.name )
|
1503 |
if image_input:
|
@@ -1507,7 +1521,7 @@ def process_image(image_input):
|
|
1507 |
messages=[
|
1508 |
{"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
|
1509 |
{"role": "user", "content": [
|
1510 |
-
{"type": "text", "text":
|
1511 |
{"type": "image_url", "image_url": {
|
1512 |
"url": f"data:image/png;base64,{base64_image}"}
|
1513 |
}
|
@@ -1528,79 +1542,65 @@ def process_image(image_input):
|
|
1528 |
with open(filename_md, "w", encoding="utf-8") as f:
|
1529 |
f.write(image_response)
|
1530 |
|
1531 |
-
#
|
1532 |
-
|
1533 |
-
|
|
|
|
|
|
|
|
|
|
|
1534 |
|
1535 |
return image_response
|
1536 |
|
1537 |
-
def
|
1538 |
-
|
1539 |
-
|
1540 |
-
|
1541 |
-
|
1542 |
-
|
1543 |
-
|
1544 |
-
if image_input:
|
1545 |
-
base64_image = base64.b64encode(image_input.read()).decode("utf-8")
|
1546 |
-
response = client.chat.completions.create(
|
1547 |
-
model=MODEL,
|
1548 |
-
messages=[
|
1549 |
-
{"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
|
1550 |
-
{"role": "user", "content": [
|
1551 |
-
{"type": "text", "text": "Help me understand what is in this picture and list ten facts as markdown outline with appropriate emojis that describes what you see."},
|
1552 |
-
{"type": "image_url", "image_url": {
|
1553 |
-
"url": f"data:image/png;base64,{base64_image}"}
|
1554 |
-
}
|
1555 |
-
]}
|
1556 |
-
],
|
1557 |
-
temperature=0.0,
|
1558 |
-
)
|
1559 |
-
image_response = response.choices[0].message.content
|
1560 |
-
st.markdown(image_response)
|
1561 |
-
|
1562 |
-
filename_txt = generate_filename(image_response, "md") # Save markdown on image AI output from gpt4o
|
1563 |
-
create_file(filename_txt, image_response, '', True) #create_file() # create_file() 3 required positional arguments: 'filename', 'prompt', and 'response'
|
1564 |
-
|
1565 |
-
filename_txt = generate_filename(image_response, "png")
|
1566 |
-
save_image(image_input, filename_txt) # Save copy of image with new filename
|
1567 |
-
#st.rerun() # rerun to show new image and new markdown files
|
1568 |
-
|
1569 |
-
return image_response
|
1570 |
-
|
1571 |
|
1572 |
-
def process_audio(audio_input):
|
1573 |
if audio_input:
|
1574 |
transcription = client.audio.transcriptions.create(
|
1575 |
model="whisper-1",
|
1576 |
file=audio_input,
|
1577 |
)
|
1578 |
-
|
1579 |
-
|
1580 |
-
|
1581 |
-
|
1582 |
-
|
1583 |
-
|
1584 |
-
|
1585 |
-
|
1586 |
-
|
|
|
|
|
|
|
|
|
|
|
1587 |
|
1588 |
def process_audio_for_video(video_input):
|
1589 |
if video_input:
|
1590 |
-
|
1591 |
-
|
1592 |
-
|
1593 |
-
|
1594 |
-
|
1595 |
-
|
1596 |
-
|
1597 |
-
|
1598 |
-
|
1599 |
-
|
1600 |
-
|
1601 |
-
|
1602 |
-
|
1603 |
-
|
|
|
|
|
|
|
1604 |
|
1605 |
def save_video(video_file):
|
1606 |
# Save the uploaded video file
|
@@ -1631,10 +1631,16 @@ def process_video(video_path, seconds_per_frame=2):
|
|
1631 |
|
1632 |
# Extract audio from video
|
1633 |
audio_path = f"{base_video_path}.mp3"
|
1634 |
-
|
1635 |
-
|
1636 |
-
|
1637 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1638 |
|
1639 |
print(f"Extracted {len(base64Frames)} frames")
|
1640 |
print(f"Extracted audio to {audio_path}")
|
@@ -1669,8 +1675,9 @@ def process_audio_and_video(video_input):
|
|
1669 |
results = response.choices[0].message.content
|
1670 |
st.markdown(results)
|
1671 |
|
1672 |
-
|
1673 |
-
|
|
|
1674 |
|
1675 |
|
1676 |
|
@@ -1683,53 +1690,139 @@ def main():
|
|
1683 |
if (text_input > ''):
|
1684 |
textResponse = process_text(text_input)
|
1685 |
elif option == "Image":
|
|
|
|
|
1686 |
image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
|
1687 |
-
image_response = process_image(image_input)
|
1688 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1689 |
|
|
|
1690 |
|
1691 |
-
elif option == "Audio":
|
1692 |
-
audio_input = st.file_uploader("Upload an audio file", type=["mp3", "wav"])
|
1693 |
-
process_audio(audio_input)
|
1694 |
elif option == "Video":
|
1695 |
video_input = st.file_uploader("Upload a video file", type=["mp4"])
|
1696 |
process_audio_and_video(video_input)
|
1697 |
|
1698 |
-
# Image and Video Galleries
|
1699 |
-
num_columns_images=st.slider(key="num_columns_images", label="Choose Number of Image Columns", min_value=1, max_value=15, value=5)
|
1700 |
-
display_images_and_wikipedia_summaries(num_columns_images) # Image Jump Grid
|
1701 |
|
1702 |
-
|
1703 |
-
|
|
|
|
|
|
|
|
|
1704 |
|
1705 |
|
1706 |
-
# Optional UI's
|
1707 |
-
showExtendedTextInterface=False
|
1708 |
-
if showExtendedTextInterface:
|
1709 |
-
display_glossary_grid(roleplaying_glossary) # Word Glossary Jump Grid - Dynamically calculates columns based on details length to keep topic together
|
1710 |
-
num_columns_text=st.slider(key="num_columns_text", label="Choose Number of Text Columns", min_value=1, max_value=15, value=4)
|
1711 |
-
display_buttons_with_scores(num_columns_text) # Feedback Jump Grid
|
1712 |
-
st.markdown(personality_factors)
|
1713 |
|
|
|
|
|
|
|
|
|
1714 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1715 |
|
1716 |
|
1717 |
-
# st.title("GPT-4o ChatBot")
|
1718 |
|
1719 |
-
client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
|
1720 |
-
MODEL = "gpt-4o-2024-05-13"
|
1721 |
-
if "openai_model" not in st.session_state:
|
1722 |
-
st.session_state["openai_model"] = MODEL
|
1723 |
-
if "messages" not in st.session_state:
|
1724 |
-
st.session_state.messages = []
|
1725 |
-
if st.button("Clear Session"):
|
1726 |
-
st.session_state.messages = []
|
1727 |
|
1728 |
-
current_messages=[]
|
1729 |
-
for message in st.session_state.messages:
|
1730 |
-
with st.chat_message(message["role"]):
|
1731 |
-
current_messages.append(message)
|
1732 |
-
st.markdown(message["content"])
|
1733 |
|
1734 |
# ChatBot Entry
|
1735 |
if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
|
@@ -1745,5 +1838,28 @@ if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with
|
|
1745 |
response = process_text2(text_input=prompt)
|
1746 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
1747 |
|
1748 |
-
|
1749 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
}
|
54 |
)
|
55 |
|
56 |
+
client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
|
57 |
+
MODEL = "gpt-4o-2024-05-13"
|
58 |
+
if "openai_model" not in st.session_state:
|
59 |
+
st.session_state["openai_model"] = MODEL
|
60 |
+
if "messages" not in st.session_state:
|
61 |
+
st.session_state.messages = []
|
62 |
+
if st.button("Clear Session"):
|
63 |
+
st.session_state.messages = []
|
64 |
|
65 |
# HTML5 based Speech Synthesis (Text to Speech in Browser)
|
66 |
@st.cache_resource
|
|
|
366 |
st.markdown(f"**{term}** <small>{links_md}</small>", unsafe_allow_html=True)
|
367 |
|
368 |
|
369 |
+
# ChatBot client chat completions ------------------------- !!
|
370 |
+
def process_text2(MODEL='gpt-4o-2024-05-13', text_input='What is 2+2 and what is an imaginary number'):
|
371 |
+
if text_input:
|
372 |
+
completion = client.chat.completions.create(
|
373 |
+
model=MODEL,
|
374 |
+
messages=st.session_state.messages
|
375 |
+
)
|
376 |
+
return_text = completion.choices[0].message.content
|
377 |
+
st.write("Assistant: " + return_text)
|
378 |
+
filename = generate_filename(text_input, "md")
|
379 |
+
create_file(filename, text_input, return_text, should_save)
|
380 |
+
return return_text
|
381 |
+
|
382 |
@st.cache_resource
|
383 |
def get_table_download_link(file_path):
|
384 |
|
|
|
596 |
|
597 |
if next_action=='md':
|
598 |
st.markdown(file_contents)
|
599 |
+
SpeechSynthesis(file_contents)
|
600 |
+
|
601 |
buttonlabel = 'πRun'
|
602 |
if st.button(key='Runmd', label = buttonlabel):
|
603 |
+
MODEL = "gpt-4o-2024-05-13"
|
604 |
+
openai.api_key = os.getenv('OPENAI_API_KEY')
|
605 |
+
openai.organization = os.getenv('OPENAI_ORG_ID')
|
606 |
+
client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
|
607 |
+
st.session_state.messages.append({"role": "user", "content": transcript})
|
608 |
+
with st.chat_message("user"):
|
609 |
+
st.markdown(transcript)
|
610 |
+
with st.chat_message("assistant"):
|
611 |
+
completion = client.chat.completions.create(
|
612 |
+
model=MODEL,
|
613 |
+
messages = st.session_state.messages,
|
614 |
+
stream=True
|
615 |
+
)
|
616 |
+
response = process_text2(text_input=prompt)
|
617 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
618 |
+
#try:
|
619 |
#search_glossary(file_contents)
|
620 |
#except:
|
621 |
#st.markdown('GPT is sleeping. Restart ETA 30 seconds.')
|
|
|
788 |
display_glossary_entity(k)
|
789 |
col_index += 1 # Increment column index to place the next video in the next column
|
790 |
|
791 |
+
#@st.cache_resource
|
792 |
def display_images_and_wikipedia_summaries(num_columns=4):
|
793 |
image_files = [f for f in os.listdir('.') if f.endswith('.png')]
|
794 |
if not image_files:
|
|
|
1278 |
|
1279 |
|
1280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1281 |
GiveFeedback=False
|
1282 |
if GiveFeedback:
|
1283 |
with st.expander("Give your feedback π", expanded=False):
|
|
|
1343 |
st.write(result)
|
1344 |
return result
|
1345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1346 |
|
1347 |
# Transcript to arxiv and client chat completion ------------------------- !!
|
1348 |
filename = save_and_play_audio(audio_recorder)
|
|
|
1392 |
for example_input in session_state["search_queries"]:
|
1393 |
st.write(example_input)
|
1394 |
|
1395 |
+
#if st.button("Run Prompt", help="Click to run."):
|
1396 |
+
# try:
|
1397 |
+
# response=StreamLLMChatResponse(example_input)
|
1398 |
+
# create_file(filename, example_input, response, should_save)
|
1399 |
+
# except:
|
1400 |
+
# st.write('model is asleep. Starting now on A10 GPU. Please wait one minute then retry. KEDA triggered.')
|
1401 |
|
1402 |
openai.api_key = os.getenv('OPENAI_API_KEY')
|
1403 |
if openai.api_key == None: openai.api_key = st.secrets['OPENAI_API_KEY']
|
|
|
1441 |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
1442 |
|
1443 |
|
1444 |
+
# GPT4o documentation
|
1445 |
# 1. Cookbook: https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o
|
1446 |
# 2. Configure your Project and Orgs to limit/allow Models: https://platform.openai.com/settings/organization/general
|
1447 |
# 3. Watch your Billing! https://platform.openai.com/settings/organization/billing/overview
|
|
|
1482 |
|
1483 |
#st.write("Assistant: " + completion.choices[0].message.content)
|
1484 |
|
1485 |
+
def create_file(filename, prompt, response, is_image=False):
|
1486 |
+
with open(filename, "w", encoding="utf-8") as f:
|
1487 |
+
f.write(prompt + "\n\n" + response)
|
1488 |
|
1489 |
+
def save_image_old2(image, filename):
|
1490 |
+
with open(filename, "wb") as f:
|
1491 |
+
f.write(image.getbuffer())
|
1492 |
+
|
1493 |
+
# Now filename length protected for linux and windows filename lengths
|
1494 |
+
def save_image(image, filename):
|
1495 |
+
max_filename_length = 250
|
1496 |
+
filename_stem, extension = os.path.splitext(filename)
|
1497 |
+
truncated_stem = filename_stem[:max_filename_length - len(extension)] if len(filename) > max_filename_length else filename_stem
|
1498 |
+
filename = f"{truncated_stem}{extension}"
|
1499 |
with open(filename, "wb") as f:
|
1500 |
+
f.write(image.getbuffer())
|
1501 |
return filename
|
1502 |
+
|
1503 |
+
def extract_boldface_terms(text):
|
1504 |
+
return re.findall(r'\*\*(.*?)\*\*', text)
|
1505 |
+
|
1506 |
+
def extract_title(text):
|
1507 |
+
boldface_terms = re.findall(r'\*\*(.*?)\*\*', text)
|
1508 |
+
if boldface_terms:
|
1509 |
+
title = ' '.join(boldface_terms)
|
1510 |
+
else:
|
1511 |
+
title = re.sub(r'[^a-zA-Z0-9_\-]', ' ', text[-200:])
|
1512 |
+
return title[-200:]
|
1513 |
|
1514 |
+
def process_image(image_input, user_prompt):
|
1515 |
if image_input:
|
1516 |
st.markdown('Processing image: ' + image_input.name )
|
1517 |
if image_input:
|
|
|
1521 |
messages=[
|
1522 |
{"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
|
1523 |
{"role": "user", "content": [
|
1524 |
+
{"type": "text", "text": user_prompt},
|
1525 |
{"type": "image_url", "image_url": {
|
1526 |
"url": f"data:image/png;base64,{base64_image}"}
|
1527 |
}
|
|
|
1542 |
with open(filename_md, "w", encoding="utf-8") as f:
|
1543 |
f.write(image_response)
|
1544 |
|
1545 |
+
# Extract boldface terms from image_response then autoname save file
|
1546 |
+
#boldface_terms = extract_boldface_terms(image_response)
|
1547 |
+
boldface_terms = extract_title(image_response).replace(':','')
|
1548 |
+
filename_stem, extension = os.path.splitext(image_input.name)
|
1549 |
+
filename_img = f"{filename_stem} {''.join(boldface_terms)}{extension}"
|
1550 |
+
newfilename = save_image(image_input, filename_img)
|
1551 |
+
filename_md = newfilename.replace('.png', '.md')
|
1552 |
+
create_file(filename_md, '', image_response, True)
|
1553 |
|
1554 |
return image_response
|
1555 |
|
1556 |
+
def create_audio_file(filename, audio_data, should_save):
|
1557 |
+
if should_save:
|
1558 |
+
with open(filename, "wb") as file:
|
1559 |
+
file.write(audio_data.getvalue())
|
1560 |
+
st.success(f"Audio file saved as {filename}")
|
1561 |
+
else:
|
1562 |
+
st.warning("Audio file not saved.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1563 |
|
1564 |
+
def process_audio(audio_input, text_input):
|
1565 |
if audio_input:
|
1566 |
transcription = client.audio.transcriptions.create(
|
1567 |
model="whisper-1",
|
1568 |
file=audio_input,
|
1569 |
)
|
1570 |
+
st.session_state.messages.append({"role": "user", "content": transcription.text})
|
1571 |
+
with st.chat_message("assistant"):
|
1572 |
+
st.markdown(transcription.text)
|
1573 |
+
|
1574 |
+
SpeechSynthesis(transcription.text)
|
1575 |
+
filename = generate_filename(transcription.text, "wav")
|
1576 |
+
|
1577 |
+
create_audio_file(filename, audio_input, should_save)
|
1578 |
+
|
1579 |
+
#SpeechSynthesis(transcription.text)
|
1580 |
+
|
1581 |
+
filename = generate_filename(transcription.text, "md")
|
1582 |
+
create_file(filename, transcription.text, transcription.text, should_save)
|
1583 |
+
#st.markdown(response.choices[0].message.content)
|
1584 |
|
1585 |
def process_audio_for_video(video_input):
|
1586 |
if video_input:
|
1587 |
+
try:
|
1588 |
+
transcription = client.audio.transcriptions.create(
|
1589 |
+
model="whisper-1",
|
1590 |
+
file=video_input,
|
1591 |
+
)
|
1592 |
+
response = client.chat.completions.create(
|
1593 |
+
model=MODEL,
|
1594 |
+
messages=[
|
1595 |
+
{"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
|
1596 |
+
{"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription}"}],}
|
1597 |
+
],
|
1598 |
+
temperature=0,
|
1599 |
+
)
|
1600 |
+
st.markdown(response.choices[0].message.content)
|
1601 |
+
return response.choices[0].message.content
|
1602 |
+
except:
|
1603 |
+
st.write('No transcript')
|
1604 |
|
1605 |
def save_video(video_file):
|
1606 |
# Save the uploaded video file
|
|
|
1631 |
|
1632 |
# Extract audio from video
|
1633 |
audio_path = f"{base_video_path}.mp3"
|
1634 |
+
try:
|
1635 |
+
clip = VideoFileClip(video_path)
|
1636 |
+
|
1637 |
+
clip.audio.write_audiofile(audio_path, bitrate="32k")
|
1638 |
+
clip.audio.close()
|
1639 |
+
|
1640 |
+
clip.close()
|
1641 |
+
except:
|
1642 |
+
st.write('No audio track found, moving on..')
|
1643 |
+
|
1644 |
|
1645 |
print(f"Extracted {len(base64Frames)} frames")
|
1646 |
print(f"Extracted audio to {audio_path}")
|
|
|
1675 |
results = response.choices[0].message.content
|
1676 |
st.markdown(results)
|
1677 |
|
1678 |
+
if transcript:
|
1679 |
+
filename = generate_filename(transcript, "md")
|
1680 |
+
create_file(filename, transcript, results, should_save)
|
1681 |
|
1682 |
|
1683 |
|
|
|
1690 |
if (text_input > ''):
|
1691 |
textResponse = process_text(text_input)
|
1692 |
elif option == "Image":
|
1693 |
+
text = "Help me understand what is in this picture and list ten facts as markdown outline with appropriate emojis that describes what you see."
|
1694 |
+
text_input = st.text_input(label="Enter text prompt to use with Image context.", value=text)
|
1695 |
image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
|
1696 |
+
image_response = process_image(image_input, text_input)
|
1697 |
|
1698 |
+
elif option == "Audio":
|
1699 |
+
text = "You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."
|
1700 |
+
text_input = st.text_input(label="Enter text prompt to use with Audio context.", value=text)
|
1701 |
+
uploaded_files = st.file_uploader("Upload an audio file", type=["mp3", "wav"], accept_multiple_files=True)
|
1702 |
+
|
1703 |
+
for audio_input in uploaded_files:
|
1704 |
+
st.write(audio_input.name)
|
1705 |
+
if audio_input is not None:
|
1706 |
+
process_audio(audio_input, text_input)
|
1707 |
+
|
1708 |
+
elif option == "Audio old":
|
1709 |
+
#text = "Transcribe and answer questions as a helpful audio music and speech assistant. "
|
1710 |
+
text = "You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."
|
1711 |
+
text_input = st.text_input(label="Enter text prompt to use with Audio context.", value=text)
|
1712 |
+
|
1713 |
+
uploaded_files = st.file_uploader("Upload an audio file", type=["mp3", "wav"], accept_multiple_files=True)
|
1714 |
+
for audio_input in uploaded_files:
|
1715 |
+
st.write(audio_input.name)
|
1716 |
+
|
1717 |
+
if audio_input is not None:
|
1718 |
+
# To read file as bytes:
|
1719 |
+
bytes_data = uploaded_file.getvalue()
|
1720 |
+
#st.write(bytes_data)
|
1721 |
+
|
1722 |
+
# To convert to a string based IO:
|
1723 |
+
#stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
|
1724 |
+
#st.write(stringio)
|
1725 |
+
|
1726 |
+
# To read file as string:
|
1727 |
+
#string_data = stringio.read()
|
1728 |
+
#st.write(string_data)
|
1729 |
|
1730 |
+
process_audio(audio_input, text_input)
|
1731 |
|
|
|
|
|
|
|
1732 |
elif option == "Video":
|
1733 |
video_input = st.file_uploader("Upload a video file", type=["mp4"])
|
1734 |
process_audio_and_video(video_input)
|
1735 |
|
|
|
|
|
|
|
1736 |
|
1737 |
+
# Enter the GPT-4o omni model in streamlit chatbot
|
1738 |
+
current_messages=[]
|
1739 |
+
for message in st.session_state.messages:
|
1740 |
+
with st.chat_message(message["role"]):
|
1741 |
+
current_messages.append(message)
|
1742 |
+
st.markdown(message["content"])
|
1743 |
|
1744 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1745 |
|
1746 |
+
# π΅ Wav Audio files - Transcription History in Wav
|
1747 |
+
audio_files = glob.glob("*.wav")
|
1748 |
+
audio_files = [file for file in audio_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
|
1749 |
+
audio_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
|
1750 |
|
1751 |
+
# πΌ PNG Image files
|
1752 |
+
image_files = glob.glob("*.png")
|
1753 |
+
image_files = [file for file in image_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
|
1754 |
+
image_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
|
1755 |
+
|
1756 |
+
# π₯ MP4 Video files
|
1757 |
+
video_files = glob.glob("*.mp4")
|
1758 |
+
video_files = [file for file in video_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
|
1759 |
+
video_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
|
1760 |
+
|
1761 |
+
|
1762 |
+
|
1763 |
+
|
1764 |
+
main()
|
1765 |
+
|
1766 |
+
# Delete All button for each file type
|
1767 |
+
if st.sidebar.button("π Delete All Audio"):
|
1768 |
+
for file in audio_files:
|
1769 |
+
os.remove(file)
|
1770 |
+
st.rerun()
|
1771 |
+
|
1772 |
+
if st.sidebar.button("π Delete All Images"):
|
1773 |
+
for file in image_files:
|
1774 |
+
os.remove(file)
|
1775 |
+
st.rerun()
|
1776 |
+
|
1777 |
+
if st.sidebar.button("π Delete All Videos"):
|
1778 |
+
for file in video_files:
|
1779 |
+
os.remove(file)
|
1780 |
+
st.rerun()
|
1781 |
+
|
1782 |
+
# Display and handle audio files
|
1783 |
+
for file in audio_files:
|
1784 |
+
col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed
|
1785 |
+
with col1:
|
1786 |
+
st.markdown(file)
|
1787 |
+
if st.button("π΅", key="play_" + file): # play emoji button
|
1788 |
+
audio_file = open(file, 'rb')
|
1789 |
+
audio_bytes = audio_file.read()
|
1790 |
+
st.audio(audio_bytes, format='audio/wav')
|
1791 |
+
with col2:
|
1792 |
+
if st.button("π", key="delete_" + file):
|
1793 |
+
os.remove(file)
|
1794 |
+
st.rerun()
|
1795 |
+
|
1796 |
+
# Display and handle image files
|
1797 |
+
for file in image_files:
|
1798 |
+
col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed
|
1799 |
+
with col1:
|
1800 |
+
st.markdown(file)
|
1801 |
+
if st.button("πΌ", key="show_" + file): # show emoji button
|
1802 |
+
image = open(file, 'rb').read()
|
1803 |
+
st.image(image)
|
1804 |
+
with col2:
|
1805 |
+
if st.button("π", key="delete_" + file):
|
1806 |
+
os.remove(file)
|
1807 |
+
st.rerun()
|
1808 |
+
|
1809 |
+
# Display and handle video files
|
1810 |
+
for file in video_files:
|
1811 |
+
col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed
|
1812 |
+
with col1:
|
1813 |
+
st.markdown(file)
|
1814 |
+
if st.button("π₯", key="play_" + file): # play emoji button
|
1815 |
+
video_file = open(file, 'rb')
|
1816 |
+
video_bytes = video_file.read()
|
1817 |
+
st.video(video_bytes)
|
1818 |
+
with col2:
|
1819 |
+
if st.button("π", key="delete_" + file):
|
1820 |
+
os.remove(file)
|
1821 |
+
st.rerun()
|
1822 |
|
1823 |
|
|
|
1824 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1825 |
|
|
|
|
|
|
|
|
|
|
|
1826 |
|
1827 |
# ChatBot Entry
|
1828 |
if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
|
|
|
1838 |
response = process_text2(text_input=prompt)
|
1839 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
1840 |
|
1841 |
+
|
1842 |
+
|
1843 |
+
|
1844 |
+
|
1845 |
+
# Image and Video Galleries
|
1846 |
+
num_columns_images=st.slider(key="num_columns_images", label="Choose Number of Image Columns", min_value=1, max_value=15, value=3)
|
1847 |
+
display_images_and_wikipedia_summaries(num_columns_images) # Image Jump Grid
|
1848 |
+
|
1849 |
+
num_columns_video=st.slider(key="num_columns_video", label="Choose Number of Video Columns", min_value=1, max_value=15, value=3)
|
1850 |
+
display_videos_and_links(num_columns_video) # Video Jump Grid
|
1851 |
+
|
1852 |
+
|
1853 |
+
# Optional UI's
|
1854 |
+
showExtendedTextInterface=False
|
1855 |
+
if showExtendedTextInterface:
|
1856 |
+
display_glossary_grid(roleplaying_glossary) # Word Glossary Jump Grid - Dynamically calculates columns based on details length to keep topic together
|
1857 |
+
num_columns_text=st.slider(key="num_columns_text", label="Choose Number of Text Columns", min_value=1, max_value=15, value=4)
|
1858 |
+
display_buttons_with_scores(num_columns_text) # Feedback Jump Grid
|
1859 |
+
st.markdown(personality_factors)
|
1860 |
+
|
1861 |
+
|
1862 |
+
|
1863 |
+
|
1864 |
+
#if __name__ == "__main__":
|
1865 |
+
|