Erva Ulusoy commited on
Commit
da1c3d0
·
1 Parent(s): 867722f

added fuzzy search feature

Browse files
Files changed (2) hide show
  1. ProtHGT_app.py +13 -18
  2. requirements.txt +2 -1
ProtHGT_app.py CHANGED
@@ -1,10 +1,6 @@
1
  import os
2
  import streamlit as st
3
- import time
4
- import streamlit.components.v1 as components
5
- import pandas as pd
6
-
7
-
8
 
9
  # with st.spinner("Initializing the environment... This may take up to 10 minutes at the start of each session."):
10
  # # Create a temporary placeholder for the message
@@ -92,26 +88,25 @@ with st.sidebar:
92
  )
93
 
94
  if selection_method == "Search proteins":
95
- # Add custom CSS to make multiselect scrollable
96
- st.markdown("""
97
- <style>
98
- [data-testid="stMultiSelect"] div:nth-child(2) {
99
- max-height: 200px;
100
- overflow-y: auto;
101
- }
102
- </style>
103
- """, unsafe_allow_html=True)
104
-
105
  selected_proteins = st.multiselect(
106
- "Select or search for proteins (UniProt IDs)",
107
- options=available_proteins,
108
  placeholder="Start typing to search...",
109
  max_selections=1000
110
  )
111
 
112
  if selected_proteins:
113
  st.write(f"Selected {len(selected_proteins)} proteins")
114
-
115
  else:
116
  uploaded_file = st.file_uploader(
117
  "Upload a text file with UniProt IDs (one per line, max 1000)*",
 
1
  import os
2
  import streamlit as st
3
+ from rapidfuzz import process
 
 
 
 
4
 
5
  # with st.spinner("Initializing the environment... This may take up to 10 minutes at the start of each session."):
6
  # # Create a temporary placeholder for the message
 
88
  )
89
 
90
  if selection_method == "Search proteins":
91
+ # User enters search term
92
+ search_query = st.text_input("Start typing a protein ID (at least 3 characters)", "")
93
+
94
+ # Apply fuzzy search only if query length is >= 3
95
+ filtered_proteins = []
96
+ if len(search_query) >= 3:
97
+ filtered_proteins = [match[0] for match in process.extract(search_query, available_proteins, limit=50)] # Show top 50 matches
98
+
99
+ # Multi-select for filtered results
 
100
  selected_proteins = st.multiselect(
101
+ "Select proteins from search results",
102
+ options=filtered_proteins,
103
  placeholder="Start typing to search...",
104
  max_selections=1000
105
  )
106
 
107
  if selected_proteins:
108
  st.write(f"Selected {len(selected_proteins)} proteins")
109
+
110
  else:
111
  uploaded_file = st.file_uploader(
112
  "Upload a text file with UniProt IDs (one per line, max 1000)*",
requirements.txt CHANGED
@@ -6,4 +6,5 @@ torch==1.12.1+cpu
6
  torch_sparse==0.6.15
7
  torch_scatter==2.1.0
8
  torch_geometric==2.2.0
9
- gdown
 
 
6
  torch_sparse==0.6.15
7
  torch_scatter==2.1.0
8
  torch_geometric==2.2.0
9
+ gdown
10
+ rapidfuzz