Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -62,6 +62,28 @@ if "messages" not in st.session_state:
|
|
62 |
if st.button("Clear Session"):
|
63 |
st.session_state.messages = []
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
# HTML5 based Speech Synthesis (Text to Speech in Browser)
|
66 |
@st.cache_resource
|
67 |
def SpeechSynthesis(result):
|
@@ -101,7 +123,7 @@ def load_file(file_name):
|
|
101 |
content = file.read()
|
102 |
return content
|
103 |
|
104 |
-
def
|
105 |
try:
|
106 |
date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
|
107 |
abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
|
@@ -130,6 +152,49 @@ def extract_urls(text):
|
|
130 |
st.write('.')
|
131 |
return ''
|
132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
def download_pdfs(urls):
|
134 |
local_files = []
|
135 |
for url in urls:
|
@@ -477,13 +542,12 @@ def compare_and_delete_files(files):
|
|
477 |
# Function to get file size
|
478 |
def get_file_size(file_path):
|
479 |
return os.path.getsize(file_path)
|
480 |
-
|
481 |
def FileSidebar():
|
482 |
-
|
483 |
# File Sidebar for files 🌐View, 📂Open, ▶️Run, and 🗑Delete per file
|
484 |
-
all_files = glob.glob("*.md")
|
485 |
all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
|
486 |
-
all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by filename length which puts similar prompts together
|
487 |
|
488 |
# ⬇️ Download
|
489 |
Files1, Files2 = st.sidebar.columns(2)
|
@@ -504,7 +568,7 @@ def FileSidebar():
|
|
504 |
for file in all_files:
|
505 |
col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
|
506 |
with col1:
|
507 |
-
if st.button("🌐", key="
|
508 |
file_contents = load_file(file)
|
509 |
file_name=file
|
510 |
next_action='md'
|
@@ -534,6 +598,7 @@ def FileSidebar():
|
|
534 |
next_action='delete'
|
535 |
st.session_state['next_action'] = next_action
|
536 |
|
|
|
537 |
|
538 |
# 🚩File duplicate detector - useful to prune and view all. Pruning works well by file size detection of two similar and flags the duplicate.
|
539 |
file_sizes = [get_file_size(file) for file in all_files]
|
|
|
62 |
if st.button("Clear Session"):
|
63 |
st.session_state.messages = []
|
64 |
|
65 |
+
|
66 |
+
|
67 |
+
def download_and_save_abstract(url, title):
|
68 |
+
response = requests.get(url)
|
69 |
+
if response.status_code == 200:
|
70 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
71 |
+
filename = f"{title.replace(' ', '_')}_abstract.html"
|
72 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
73 |
+
f.write(str(soup))
|
74 |
+
return filename
|
75 |
+
return None
|
76 |
+
|
77 |
+
def download_and_save_pdf(url, title):
|
78 |
+
response = requests.get(url)
|
79 |
+
if response.status_code == 200:
|
80 |
+
filename = f"{title.replace(' ', '_')}.pdf"
|
81 |
+
with open(filename, 'wb') as f:
|
82 |
+
f.write(response.content)
|
83 |
+
return filename
|
84 |
+
return None
|
85 |
+
|
86 |
+
|
87 |
# HTML5 based Speech Synthesis (Text to Speech in Browser)
|
88 |
@st.cache_resource
|
89 |
def SpeechSynthesis(result):
|
|
|
123 |
content = file.read()
|
124 |
return content
|
125 |
|
126 |
+
def extract_urls_old(text):
|
127 |
try:
|
128 |
date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
|
129 |
abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
|
|
|
152 |
st.write('.')
|
153 |
return ''
|
154 |
|
155 |
+
def extract_urls(text):
|
156 |
+
try:
|
157 |
+
date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
|
158 |
+
abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
|
159 |
+
pdf_link_pattern = re.compile(r'\[⬇️\]\((https://arxiv\.org/pdf/\d+\.\d+)\)')
|
160 |
+
title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]')
|
161 |
+
date_matches = date_pattern.findall(text)
|
162 |
+
abs_link_matches = abs_link_pattern.findall(text)
|
163 |
+
pdf_link_matches = pdf_link_pattern.findall(text)
|
164 |
+
title_matches = title_pattern.findall(text)
|
165 |
+
|
166 |
+
markdown_text = ""
|
167 |
+
for i in range(len(date_matches)):
|
168 |
+
date = date_matches[i]
|
169 |
+
title = title_matches[i]
|
170 |
+
abs_link = abs_link_matches[i][1]
|
171 |
+
pdf_link = pdf_link_matches[i]
|
172 |
+
|
173 |
+
# Download and save abstract
|
174 |
+
abstract_file = download_and_save_abstract(abs_link, title)
|
175 |
+
|
176 |
+
# Download and save PDF
|
177 |
+
pdf_file = download_and_save_pdf(pdf_link, title)
|
178 |
+
|
179 |
+
markdown_text += f"**Date:** {date}\n\n"
|
180 |
+
markdown_text += f"**Title:** {title}\n\n"
|
181 |
+
markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n"
|
182 |
+
markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n"
|
183 |
+
|
184 |
+
if abstract_file:
|
185 |
+
markdown_text += f"**Local Abstract:** [View Abstract]({abstract_file})\n\n"
|
186 |
+
if pdf_file:
|
187 |
+
markdown_text += f"**Local PDF:** [View PDF]({pdf_file})\n\n"
|
188 |
+
|
189 |
+
markdown_text += "---\n\n"
|
190 |
+
return markdown_text
|
191 |
+
|
192 |
+
except:
|
193 |
+
st.write('Error in extracting URLs')
|
194 |
+
return ''
|
195 |
+
|
196 |
+
|
197 |
+
|
198 |
def download_pdfs(urls):
|
199 |
local_files = []
|
200 |
for url in urls:
|
|
|
542 |
# Function to get file size
|
543 |
def get_file_size(file_path):
|
544 |
return os.path.getsize(file_path)
|
545 |
+
|
546 |
def FileSidebar():
|
|
|
547 |
# File Sidebar for files 🌐View, 📂Open, ▶️Run, and 🗑Delete per file
|
548 |
+
all_files = glob.glob("*.md") + glob.glob("*_abstract.html") + glob.glob("*.pdf")
|
549 |
all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
|
550 |
+
all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by filename length which puts similar prompts together
|
551 |
|
552 |
# ⬇️ Download
|
553 |
Files1, Files2 = st.sidebar.columns(2)
|
|
|
568 |
for file in all_files:
|
569 |
col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
|
570 |
with col1:
|
571 |
+
if st.button("🌐", key="view_"+file): # view emoji button
|
572 |
file_contents = load_file(file)
|
573 |
file_name=file
|
574 |
next_action='md'
|
|
|
598 |
next_action='delete'
|
599 |
st.session_state['next_action'] = next_action
|
600 |
|
601 |
+
|
602 |
|
603 |
# 🚩File duplicate detector - useful to prune and view all. Pruning works well by file size detection of two similar and flags the duplicate.
|
604 |
file_sizes = [get_file_size(file) for file in all_files]
|