Spaces:
Running
Running
Erva Ulusoy
commited on
Commit
·
da1c3d0
1
Parent(s):
867722f
added fuzzy search feature
Browse files- ProtHGT_app.py +13 -18
- requirements.txt +2 -1
ProtHGT_app.py
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
-
import
|
4 |
-
import streamlit.components.v1 as components
|
5 |
-
import pandas as pd
|
6 |
-
|
7 |
-
|
8 |
|
9 |
# with st.spinner("Initializing the environment... This may take up to 10 minutes at the start of each session."):
|
10 |
# # Create a temporary placeholder for the message
|
@@ -92,26 +88,25 @@ with st.sidebar:
|
|
92 |
)
|
93 |
|
94 |
if selection_method == "Search proteins":
|
95 |
-
#
|
96 |
-
st.
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
selected_proteins = st.multiselect(
|
106 |
-
"Select
|
107 |
-
options=
|
108 |
placeholder="Start typing to search...",
|
109 |
max_selections=1000
|
110 |
)
|
111 |
|
112 |
if selected_proteins:
|
113 |
st.write(f"Selected {len(selected_proteins)} proteins")
|
114 |
-
|
115 |
else:
|
116 |
uploaded_file = st.file_uploader(
|
117 |
"Upload a text file with UniProt IDs (one per line, max 1000)*",
|
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
+
from rapidfuzz import process
|
|
|
|
|
|
|
|
|
4 |
|
5 |
# with st.spinner("Initializing the environment... This may take up to 10 minutes at the start of each session."):
|
6 |
# # Create a temporary placeholder for the message
|
|
|
88 |
)
|
89 |
|
90 |
if selection_method == "Search proteins":
|
91 |
+
# User enters search term
|
92 |
+
search_query = st.text_input("Start typing a protein ID (at least 3 characters)", "")
|
93 |
+
|
94 |
+
# Apply fuzzy search only if query length is >= 3
|
95 |
+
filtered_proteins = []
|
96 |
+
if len(search_query) >= 3:
|
97 |
+
filtered_proteins = [match[0] for match in process.extract(search_query, available_proteins, limit=50)] # Show top 50 matches
|
98 |
+
|
99 |
+
# Multi-select for filtered results
|
|
|
100 |
selected_proteins = st.multiselect(
|
101 |
+
"Select proteins from search results",
|
102 |
+
options=filtered_proteins,
|
103 |
placeholder="Start typing to search...",
|
104 |
max_selections=1000
|
105 |
)
|
106 |
|
107 |
if selected_proteins:
|
108 |
st.write(f"Selected {len(selected_proteins)} proteins")
|
109 |
+
|
110 |
else:
|
111 |
uploaded_file = st.file_uploader(
|
112 |
"Upload a text file with UniProt IDs (one per line, max 1000)*",
|
requirements.txt
CHANGED
@@ -6,4 +6,5 @@ torch==1.12.1+cpu
|
|
6 |
torch_sparse==0.6.15
|
7 |
torch_scatter==2.1.0
|
8 |
torch_geometric==2.2.0
|
9 |
-
gdown
|
|
|
|
6 |
torch_sparse==0.6.15
|
7 |
torch_scatter==2.1.0
|
8 |
torch_geometric==2.2.0
|
9 |
+
gdown
|
10 |
+
rapidfuzz
|