awacke1 commited on
Commit
e3ecad4
·
verified ·
1 Parent(s): 98fefd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -6
app.py CHANGED
@@ -62,6 +62,28 @@ if "messages" not in st.session_state:
62
  if st.button("Clear Session"):
63
  st.session_state.messages = []
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  # HTML5 based Speech Synthesis (Text to Speech in Browser)
66
  @st.cache_resource
67
  def SpeechSynthesis(result):
@@ -101,7 +123,7 @@ def load_file(file_name):
101
  content = file.read()
102
  return content
103
 
104
- def extract_urls(text):
105
  try:
106
  date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
107
  abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
@@ -130,6 +152,49 @@ def extract_urls(text):
130
  st.write('.')
131
  return ''
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  def download_pdfs(urls):
134
  local_files = []
135
  for url in urls:
@@ -477,13 +542,12 @@ def compare_and_delete_files(files):
477
  # Function to get file size
478
  def get_file_size(file_path):
479
  return os.path.getsize(file_path)
480
-
481
  def FileSidebar():
482
-
483
  # File Sidebar for files 🌐View, 📂Open, ▶️Run, and 🗑Delete per file
484
- all_files = glob.glob("*.md")
485
  all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
486
- all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by filename length which puts similar prompts together - consider making date and time of file optional.
487
 
488
  # ⬇️ Download
489
  Files1, Files2 = st.sidebar.columns(2)
@@ -504,7 +568,7 @@ def FileSidebar():
504
  for file in all_files:
505
  col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
506
  with col1:
507
- if st.button("🌐", key="md_"+file): # md emoji button
508
  file_contents = load_file(file)
509
  file_name=file
510
  next_action='md'
@@ -534,6 +598,7 @@ def FileSidebar():
534
  next_action='delete'
535
  st.session_state['next_action'] = next_action
536
 
 
537
 
538
  # 🚩File duplicate detector - useful to prune and view all. Pruning works well by file size detection of two similar and flags the duplicate.
539
  file_sizes = [get_file_size(file) for file in all_files]
 
62
  if st.button("Clear Session"):
63
  st.session_state.messages = []
64
 
65
+
66
+
67
+ def download_and_save_abstract(url, title):
68
+ response = requests.get(url)
69
+ if response.status_code == 200:
70
+ soup = BeautifulSoup(response.text, 'html.parser')
71
+ filename = f"{title.replace(' ', '_')}_abstract.html"
72
+ with open(filename, 'w', encoding='utf-8') as f:
73
+ f.write(str(soup))
74
+ return filename
75
+ return None
76
+
77
+ def download_and_save_pdf(url, title):
78
+ response = requests.get(url)
79
+ if response.status_code == 200:
80
+ filename = f"{title.replace(' ', '_')}.pdf"
81
+ with open(filename, 'wb') as f:
82
+ f.write(response.content)
83
+ return filename
84
+ return None
85
+
86
+
87
  # HTML5 based Speech Synthesis (Text to Speech in Browser)
88
  @st.cache_resource
89
  def SpeechSynthesis(result):
 
123
  content = file.read()
124
  return content
125
 
126
+ def extract_urls_old(text):
127
  try:
128
  date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
129
  abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
 
152
  st.write('.')
153
  return ''
154
 
155
+ def extract_urls(text):
156
+ try:
157
+ date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
158
+ abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
159
+ pdf_link_pattern = re.compile(r'\[⬇️\]\((https://arxiv\.org/pdf/\d+\.\d+)\)')
160
+ title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]')
161
+ date_matches = date_pattern.findall(text)
162
+ abs_link_matches = abs_link_pattern.findall(text)
163
+ pdf_link_matches = pdf_link_pattern.findall(text)
164
+ title_matches = title_pattern.findall(text)
165
+
166
+ markdown_text = ""
167
+ for i in range(len(date_matches)):
168
+ date = date_matches[i]
169
+ title = title_matches[i]
170
+ abs_link = abs_link_matches[i][1]
171
+ pdf_link = pdf_link_matches[i]
172
+
173
+ # Download and save abstract
174
+ abstract_file = download_and_save_abstract(abs_link, title)
175
+
176
+ # Download and save PDF
177
+ pdf_file = download_and_save_pdf(pdf_link, title)
178
+
179
+ markdown_text += f"**Date:** {date}\n\n"
180
+ markdown_text += f"**Title:** {title}\n\n"
181
+ markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n"
182
+ markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n"
183
+
184
+ if abstract_file:
185
+ markdown_text += f"**Local Abstract:** [View Abstract]({abstract_file})\n\n"
186
+ if pdf_file:
187
+ markdown_text += f"**Local PDF:** [View PDF]({pdf_file})\n\n"
188
+
189
+ markdown_text += "---\n\n"
190
+ return markdown_text
191
+
192
+ except:
193
+ st.write('Error in extracting URLs')
194
+ return ''
195
+
196
+
197
+
198
  def download_pdfs(urls):
199
  local_files = []
200
  for url in urls:
 
542
  # Function to get file size
543
  def get_file_size(file_path):
544
  return os.path.getsize(file_path)
545
+
546
  def FileSidebar():
 
547
  # File Sidebar for files 🌐View, 📂Open, ▶️Run, and 🗑Delete per file
548
+ all_files = glob.glob("*.md") + glob.glob("*_abstract.html") + glob.glob("*.pdf")
549
  all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
550
+ all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by filename length which puts similar prompts together
551
 
552
  # ⬇️ Download
553
  Files1, Files2 = st.sidebar.columns(2)
 
568
  for file in all_files:
569
  col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
570
  with col1:
571
+ if st.button("🌐", key="view_"+file): # view emoji button
572
  file_contents = load_file(file)
573
  file_name=file
574
  next_action='md'
 
598
  next_action='delete'
599
  st.session_state['next_action'] = next_action
600
 
601
+
602
 
603
  # 🚩File duplicate detector - useful to prune and view all. Pruning works well by file size detection of two similar and flags the duplicate.
604
  file_sizes = [get_file_size(file) for file in all_files]