Spaces:

HUBioDataLab
/

ProtHGT

Running

App Files Files Community

Erva Ulusoy commited on Feb 13

Commit

9a145bd

1 Parent(s): 988f84d

added hyperlinks to uniprot and go ids

Browse files

Files changed (2) hide show

ProtHGT_app.py +27 -12
run_prothgt_app.py +2 -2

ProtHGT_app.py CHANGED Viewed

@@ -331,12 +331,17 @@ if st.session_state.submitted:
         col1, col2, col3, col4 = st.columns(4)
         with col1:
             # Protein filter
             selected_protein = st.selectbox(
                 "Filter by Protein",
-                options=['All'] + sorted(st.session_state.predictions_df['Protein'].unique().tolist())
             )
         with col2:
             # GO category filter
             selected_category = st.selectbox(
@@ -374,13 +379,13 @@ if st.session_state.submitted:
         filtered_df = st.session_state.predictions_df.copy()
         if selected_protein != 'All':
-            filtered_df = filtered_df[filtered_df['Protein'] == selected_protein]
         if selected_category != 'All':
             filtered_df = filtered_df[filtered_df['GO_category'] == selected_category]
         if go_term_filter:
-            filtered_df = filtered_df[filtered_df['GO_term'].str.contains(go_term_filter, case=False, na=False)]
         filtered_df = filtered_df[(filtered_df['Probability'] >= min_probability_threshold) &
                                 (filtered_df['Probability'] <= max_probability_threshold)]
@@ -429,12 +434,23 @@ if st.session_state.submitted:
         start_idx = st.session_state.page_number * rows_per_page
         end_idx = min(start_idx + rows_per_page, total_rows)
-        # Display the paginated dataframe with increased width
         st.dataframe(
             filtered_df.iloc[start_idx:end_idx],
             hide_index=True,
-            use_container_width=True,  # This makes the table use full width
             column_config={
                 "Probability": st.column_config.ProgressColumn(
                     "Probability",
                     format="%.2f",
@@ -443,7 +459,7 @@ if st.session_state.submitted:
                 ),
                 "Protein": st.column_config.TextColumn(
                     "Protein",
-                    help="UniProt ID",
                 ),
                 "GO_category": st.column_config.TextColumn(
                     "GO Category",
@@ -451,15 +467,14 @@ if st.session_state.submitted:
                 ),
                 "GO_term": st.column_config.TextColumn(
                     "GO Term",
-                    help="Gene Ontology Term ID",
                 ),
             }
         )
         # Pagination controls with better layout
         col1, col2, col3 = st.columns([1, 3, 1])
         with col1:
-            if st.button("⬅️ Previous", disabled=st.session_state.page_number == 0):
                 st.session_state.page_number -= 1
                 st.rerun()
@@ -472,7 +487,7 @@ if st.session_state.submitted:
             """, unsafe_allow_html=True)
         with col3:
-            if st.button("Next ➡️", disabled=st.session_state.page_number >= total_pages - 1):
                 st.session_state.page_number += 1
                 st.rerun()

         col1, col2, col3, col4 = st.columns(4)
         with col1:
+            # Extract UniProt IDs from URLs for the selectbox
+            uniprot_ids = st.session_state.predictions_df['UniProt_ID'].apply(
+                lambda x: x.split('/')[-2]  # Gets the ID part from the URL
+            ).unique().tolist()
             # Protein filter
             selected_protein = st.selectbox(
                 "Filter by Protein",
+                options=['All'] + sorted(uniprot_ids)
             )
         with col2:
             # GO category filter
             selected_category = st.selectbox(
         filtered_df = st.session_state.predictions_df.copy()
         if selected_protein != 'All':
+            filtered_df = filtered_df[filtered_df['UniProt_ID'].str.contains(selected_protein)]
         if selected_category != 'All':
             filtered_df = filtered_df[filtered_df['GO_category'] == selected_category]
         if go_term_filter:
+            filtered_df = filtered_df[filtered_df['GO_ID'].str.contains(go_term_filter, case=False, na=False)]
         filtered_df = filtered_df[(filtered_df['Probability'] >= min_probability_threshold) &
                                 (filtered_df['Probability'] <= max_probability_threshold)]
         start_idx = st.session_state.page_number * rows_per_page
         end_idx = min(start_idx + rows_per_page, total_rows)
         st.dataframe(
             filtered_df.iloc[start_idx:end_idx],
             hide_index=True,
+            use_container_width=True,
             column_config={
+                "UniProt_ID": st.column_config.LinkColumn(
+                    "UniProt ID",
+                    help="Click to view protein in UniProt",
+                    validate="^https://www\\.uniprot\\.org/uniprotkb/[A-Z0-9]+/entry$",
+                    display_text="^https://www\\.uniprot\\.org/uniprotkb/([A-Z0-9]+)/entry$"
+                ),
+                "GO_ID": st.column_config.LinkColumn(
+                    "GO ID",
+                    help="Click to view GO term in QuickGO",
+                    validate="^https://www\\.ebi\\.ac\\.uk/QuickGO/term/GO:[0-9]+$",
+                    display_text="^https://www\\.ebi\\.ac\\.uk/QuickGO/term/(GO:[0-9]+)$"
+                ),
                 "Probability": st.column_config.ProgressColumn(
                     "Probability",
                     format="%.2f",
                 ),
                 "Protein": st.column_config.TextColumn(
                     "Protein",
+                    help="Protein Name",
                 ),
                 "GO_category": st.column_config.TextColumn(
                     "GO Category",
                 ),
                 "GO_term": st.column_config.TextColumn(
                     "GO Term",
+                    help="Gene Ontology Term Name",
                 ),
             }
         )
         # Pagination controls with better layout
         col1, col2, col3 = st.columns([1, 3, 1])
         with col1:
+            if st.button("Previous", disabled=st.session_state.page_number == 0):
                 st.session_state.page_number -= 1
                 st.rerun()
             """, unsafe_allow_html=True)
         with col3:
+            if st.button("Next", disabled=st.session_state.page_number >= total_pages - 1):
                 st.session_state.page_number += 1
                 st.rerun()

run_prothgt_app.py CHANGED Viewed

@@ -130,9 +130,9 @@ def _create_prediction_df(predictions, heterodata, protein_ids, go_category):
     # Create DataFrame
     prediction_df = pd.DataFrame({
-        'UniProt_ID': all_proteins,
         'Protein': all_protein_names,
-        'GO_ID': all_go_terms,
         'GO_term': all_go_term_names,
         'GO_category': all_categories,
         'Probability': all_probabilities

     # Create DataFrame
     prediction_df = pd.DataFrame({
+        'UniProt_ID': [f"https://www.uniprot.org/uniprotkb/{pid}/entry" for pid in all_proteins],
         'Protein': all_protein_names,
+        'GO_ID': [f"https://www.ebi.ac.uk/QuickGO/term/{go_id}" for go_id in all_go_terms],
         'GO_term': all_go_term_names,
         'GO_category': all_categories,
         'Probability': all_probabilities