Erva Ulusoy commited on
Commit
9a145bd
·
1 Parent(s): 988f84d

added hyperlinks to uniprot and go ids

Browse files
Files changed (2) hide show
  1. ProtHGT_app.py +27 -12
  2. run_prothgt_app.py +2 -2
ProtHGT_app.py CHANGED
@@ -331,12 +331,17 @@ if st.session_state.submitted:
331
  col1, col2, col3, col4 = st.columns(4)
332
 
333
  with col1:
 
 
 
 
 
334
  # Protein filter
335
  selected_protein = st.selectbox(
336
  "Filter by Protein",
337
- options=['All'] + sorted(st.session_state.predictions_df['Protein'].unique().tolist())
338
  )
339
-
340
  with col2:
341
  # GO category filter
342
  selected_category = st.selectbox(
@@ -374,13 +379,13 @@ if st.session_state.submitted:
374
  filtered_df = st.session_state.predictions_df.copy()
375
 
376
  if selected_protein != 'All':
377
- filtered_df = filtered_df[filtered_df['Protein'] == selected_protein]
378
-
379
  if selected_category != 'All':
380
  filtered_df = filtered_df[filtered_df['GO_category'] == selected_category]
381
 
382
  if go_term_filter:
383
- filtered_df = filtered_df[filtered_df['GO_term'].str.contains(go_term_filter, case=False, na=False)]
384
 
385
  filtered_df = filtered_df[(filtered_df['Probability'] >= min_probability_threshold) &
386
  (filtered_df['Probability'] <= max_probability_threshold)]
@@ -429,12 +434,23 @@ if st.session_state.submitted:
429
  start_idx = st.session_state.page_number * rows_per_page
430
  end_idx = min(start_idx + rows_per_page, total_rows)
431
 
432
- # Display the paginated dataframe with increased width
433
  st.dataframe(
434
  filtered_df.iloc[start_idx:end_idx],
435
  hide_index=True,
436
- use_container_width=True, # This makes the table use full width
437
  column_config={
 
 
 
 
 
 
 
 
 
 
 
 
438
  "Probability": st.column_config.ProgressColumn(
439
  "Probability",
440
  format="%.2f",
@@ -443,7 +459,7 @@ if st.session_state.submitted:
443
  ),
444
  "Protein": st.column_config.TextColumn(
445
  "Protein",
446
- help="UniProt ID",
447
  ),
448
  "GO_category": st.column_config.TextColumn(
449
  "GO Category",
@@ -451,15 +467,14 @@ if st.session_state.submitted:
451
  ),
452
  "GO_term": st.column_config.TextColumn(
453
  "GO Term",
454
- help="Gene Ontology Term ID",
455
  ),
456
  }
457
  )
458
-
459
  # Pagination controls with better layout
460
  col1, col2, col3 = st.columns([1, 3, 1])
461
  with col1:
462
- if st.button("⬅️ Previous", disabled=st.session_state.page_number == 0):
463
  st.session_state.page_number -= 1
464
  st.rerun()
465
 
@@ -472,7 +487,7 @@ if st.session_state.submitted:
472
  """, unsafe_allow_html=True)
473
 
474
  with col3:
475
- if st.button("Next ➡️", disabled=st.session_state.page_number >= total_pages - 1):
476
  st.session_state.page_number += 1
477
  st.rerun()
478
 
 
331
  col1, col2, col3, col4 = st.columns(4)
332
 
333
  with col1:
334
+ # Extract UniProt IDs from URLs for the selectbox
335
+ uniprot_ids = st.session_state.predictions_df['UniProt_ID'].apply(
336
+ lambda x: x.split('/')[-2] # Gets the ID part from the URL
337
+ ).unique().tolist()
338
+
339
  # Protein filter
340
  selected_protein = st.selectbox(
341
  "Filter by Protein",
342
+ options=['All'] + sorted(uniprot_ids)
343
  )
344
+
345
  with col2:
346
  # GO category filter
347
  selected_category = st.selectbox(
 
379
  filtered_df = st.session_state.predictions_df.copy()
380
 
381
  if selected_protein != 'All':
382
+ filtered_df = filtered_df[filtered_df['UniProt_ID'].str.contains(selected_protein)]
383
+
384
  if selected_category != 'All':
385
  filtered_df = filtered_df[filtered_df['GO_category'] == selected_category]
386
 
387
  if go_term_filter:
388
+ filtered_df = filtered_df[filtered_df['GO_ID'].str.contains(go_term_filter, case=False, na=False)]
389
 
390
  filtered_df = filtered_df[(filtered_df['Probability'] >= min_probability_threshold) &
391
  (filtered_df['Probability'] <= max_probability_threshold)]
 
434
  start_idx = st.session_state.page_number * rows_per_page
435
  end_idx = min(start_idx + rows_per_page, total_rows)
436
 
 
437
  st.dataframe(
438
  filtered_df.iloc[start_idx:end_idx],
439
  hide_index=True,
440
+ use_container_width=True,
441
  column_config={
442
+ "UniProt_ID": st.column_config.LinkColumn(
443
+ "UniProt ID",
444
+ help="Click to view protein in UniProt",
445
+ validate="^https://www\\.uniprot\\.org/uniprotkb/[A-Z0-9]+/entry$",
446
+ display_text="^https://www\\.uniprot\\.org/uniprotkb/([A-Z0-9]+)/entry$"
447
+ ),
448
+ "GO_ID": st.column_config.LinkColumn(
449
+ "GO ID",
450
+ help="Click to view GO term in QuickGO",
451
+ validate="^https://www\\.ebi\\.ac\\.uk/QuickGO/term/GO:[0-9]+$",
452
+ display_text="^https://www\\.ebi\\.ac\\.uk/QuickGO/term/(GO:[0-9]+)$"
453
+ ),
454
  "Probability": st.column_config.ProgressColumn(
455
  "Probability",
456
  format="%.2f",
 
459
  ),
460
  "Protein": st.column_config.TextColumn(
461
  "Protein",
462
+ help="Protein Name",
463
  ),
464
  "GO_category": st.column_config.TextColumn(
465
  "GO Category",
 
467
  ),
468
  "GO_term": st.column_config.TextColumn(
469
  "GO Term",
470
+ help="Gene Ontology Term Name",
471
  ),
472
  }
473
  )
 
474
  # Pagination controls with better layout
475
  col1, col2, col3 = st.columns([1, 3, 1])
476
  with col1:
477
+ if st.button("Previous", disabled=st.session_state.page_number == 0):
478
  st.session_state.page_number -= 1
479
  st.rerun()
480
 
 
487
  """, unsafe_allow_html=True)
488
 
489
  with col3:
490
+ if st.button("Next", disabled=st.session_state.page_number >= total_pages - 1):
491
  st.session_state.page_number += 1
492
  st.rerun()
493
 
run_prothgt_app.py CHANGED
@@ -130,9 +130,9 @@ def _create_prediction_df(predictions, heterodata, protein_ids, go_category):
130
 
131
  # Create DataFrame
132
  prediction_df = pd.DataFrame({
133
- 'UniProt_ID': all_proteins,
134
  'Protein': all_protein_names,
135
- 'GO_ID': all_go_terms,
136
  'GO_term': all_go_term_names,
137
  'GO_category': all_categories,
138
  'Probability': all_probabilities
 
130
 
131
  # Create DataFrame
132
  prediction_df = pd.DataFrame({
133
+ 'UniProt_ID': [f"https://www.uniprot.org/uniprotkb/{pid}/entry" for pid in all_proteins],
134
  'Protein': all_protein_names,
135
+ 'GO_ID': [f"https://www.ebi.ac.uk/QuickGO/term/{go_id}" for go_id in all_go_terms],
136
  'GO_term': all_go_term_names,
137
  'GO_category': all_categories,
138
  'Probability': all_probabilities