ScienceBrain.AI

Running

App Files Files Community

awacke1 commited on Jul 9, 2024

Commit

e3ecad4

verified ·

1 Parent(s): 98fefd9

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -6

app.py CHANGED Viewed

@@ -62,6 +62,28 @@ if "messages" not in st.session_state:
 if st.button("Clear Session"):
     st.session_state.messages = []
 # HTML5 based Speech Synthesis (Text to Speech in Browser)
 @st.cache_resource
 def SpeechSynthesis(result):
@@ -101,7 +123,7 @@ def load_file(file_name):
         content = file.read()
     return content
-def extract_urls(text):
     try:
         date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
         abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
@@ -130,6 +152,49 @@ def extract_urls(text):
         st.write('.')
         return ''
 def download_pdfs(urls):
     local_files = []
     for url in urls:
@@ -477,13 +542,12 @@ def compare_and_delete_files(files):
 # Function to get file size
 def get_file_size(file_path):
     return os.path.getsize(file_path)
 def FileSidebar():
     # File Sidebar for files 🌐View, 📂Open, ▶️Run, and 🗑Delete per file
-    all_files = glob.glob("*.md")
     all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10]  # exclude files with short names
-    all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True)  # sort by filename length which puts similar prompts together - consider making date and time of file optional.
     # ⬇️ Download
     Files1, Files2 = st.sidebar.columns(2)
@@ -504,7 +568,7 @@ def FileSidebar():
     for file in all_files:
         col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1])  # adjust the ratio as needed
         with col1:
-            if st.button("🌐", key="md_"+file):  # md emoji button
                 file_contents = load_file(file)
                 file_name=file
                 next_action='md'
@@ -534,6 +598,7 @@ def FileSidebar():
                 next_action='delete'
                 st.session_state['next_action'] = next_action
     # 🚩File duplicate detector - useful to prune and view all.  Pruning works well by file size detection of two similar and flags the duplicate.
     file_sizes = [get_file_size(file) for file in all_files]

 if st.button("Clear Session"):
     st.session_state.messages = []
+def download_and_save_abstract(url, title):
+    response = requests.get(url)
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.text, 'html.parser')
+        filename = f"{title.replace(' ', '_')}_abstract.html"
+        with open(filename, 'w', encoding='utf-8') as f:
+            f.write(str(soup))
+        return filename
+    return None
+def download_and_save_pdf(url, title):
+    response = requests.get(url)
+    if response.status_code == 200:
+        filename = f"{title.replace(' ', '_')}.pdf"
+        with open(filename, 'wb') as f:
+            f.write(response.content)
+        return filename
+    return None
 # HTML5 based Speech Synthesis (Text to Speech in Browser)
 @st.cache_resource
 def SpeechSynthesis(result):
         content = file.read()
     return content
+def extract_urls_old(text):
     try:
         date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
         abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
         st.write('.')
         return ''
+def extract_urls(text):
+    try:
+        date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
+        abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
+        pdf_link_pattern = re.compile(r'\[⬇️\]\((https://arxiv\.org/pdf/\d+\.\d+)\)')
+        title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]')
+        date_matches = date_pattern.findall(text)
+        abs_link_matches = abs_link_pattern.findall(text)
+        pdf_link_matches = pdf_link_pattern.findall(text)
+        title_matches = title_pattern.findall(text)
+        markdown_text = ""
+        for i in range(len(date_matches)):
+            date = date_matches[i]
+            title = title_matches[i]
+            abs_link = abs_link_matches[i][1]
+            pdf_link = pdf_link_matches[i]
+            # Download and save abstract
+            abstract_file = download_and_save_abstract(abs_link, title)
+            # Download and save PDF
+            pdf_file = download_and_save_pdf(pdf_link, title)
+            markdown_text += f"**Date:** {date}\n\n"
+            markdown_text += f"**Title:** {title}\n\n"
+            markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n"
+            markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n"
+            if abstract_file:
+                markdown_text += f"**Local Abstract:** [View Abstract]({abstract_file})\n\n"
+            if pdf_file:
+                markdown_text += f"**Local PDF:** [View PDF]({pdf_file})\n\n"
+            markdown_text += "---\n\n"
+        return markdown_text
+    except:
+        st.write('Error in extracting URLs')
+        return ''
 def download_pdfs(urls):
     local_files = []
     for url in urls:
 # Function to get file size
 def get_file_size(file_path):
     return os.path.getsize(file_path)
 def FileSidebar():
     # File Sidebar for files 🌐View, 📂Open, ▶️Run, and 🗑Delete per file
+    all_files = glob.glob("*.md") + glob.glob("*_abstract.html") + glob.glob("*.pdf")
     all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10]  # exclude files with short names
+    all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True)  # sort by filename length which puts similar prompts together
     # ⬇️ Download
     Files1, Files2 = st.sidebar.columns(2)
     for file in all_files:
         col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1])  # adjust the ratio as needed
         with col1:
+            if st.button("🌐", key="view_"+file):  # view emoji button
                 file_contents = load_file(file)
                 file_name=file
                 next_action='md'
                 next_action='delete'
                 st.session_state['next_action'] = next_action
     # 🚩File duplicate detector - useful to prune and view all.  Pruning works well by file size detection of two similar and flags the duplicate.
     file_sizes = [get_file_size(file) for file in all_files]