Daniel Tse
commited on
Commit
·
4fa56af
1
Parent(s):
4bce6ba
Remove max_len and min_len
Browse files
app.py
CHANGED
@@ -9,6 +9,9 @@ from nltk import sent_tokenize
|
|
9 |
nltk.download('punkt')
|
10 |
|
11 |
|
|
|
|
|
|
|
12 |
def transcribe_audio(audiofile):
|
13 |
|
14 |
st.session_state['audio'] = audiofile
|
@@ -73,10 +76,17 @@ def summarize_podcast(audiotranscription):
|
|
73 |
st.info("Chunking text")
|
74 |
text_chunks = chunk_and_preprocess_text(audiotranscription)
|
75 |
|
76 |
-
summarized_text = summarizer(text_chunks, max_len=200,min_len=50)
|
|
|
77 |
st.session_state['summary'] = summarized_text
|
78 |
return summarized_text
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
st.markdown("# Podcast Q&A")
|
82 |
|
|
|
9 |
nltk.download('punkt')
|
10 |
|
11 |
|
12 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
13 |
+
|
14 |
+
|
15 |
def transcribe_audio(audiofile):
|
16 |
|
17 |
st.session_state['audio'] = audiofile
|
|
|
76 |
st.info("Chunking text")
|
77 |
text_chunks = chunk_and_preprocess_text(audiotranscription)
|
78 |
|
79 |
+
#summarized_text = summarizer(text_chunks, max_len=200,min_len=50)
|
80 |
+
summarized_text = summarizer(text_chunks)
|
81 |
st.session_state['summary'] = summarized_text
|
82 |
return summarized_text
|
83 |
+
|
84 |
+
def prepare_text_for_qa(audiotranscription):
|
85 |
+
|
86 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)
|
87 |
+
documents = text_splitter.split_documents(audiotranscription)
|
88 |
+
revalue = ""
|
89 |
+
return revalue
|
90 |
|
91 |
st.markdown("# Podcast Q&A")
|
92 |
|