Eachan Johnson commited on
Commit
597542d
·
1 Parent(s): ad359b2

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +1 -0
  2. .gradio/certificate.pem +31 -0
  3. README.md +2 -0
  4. app.py +438 -0
  5. cache/cache_csv_default-00953711766d478a_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  6. cache/cache_csv_default-03c2d6a24096cadb_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  7. cache/cache_csv_default-06ebd4abec88f824_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  8. cache/cache_csv_default-08596bdace45a9e0_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  9. cache/cache_csv_default-0ccf5404d587e265_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  10. cache/cache_csv_default-1152648bff9b0619_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  11. cache/cache_csv_default-11d1d03ac37ee54d_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  12. cache/cache_csv_default-13e8ff2cbdbb1601_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  13. cache/cache_csv_default-1b04ae4fda4a32e3_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  14. cache/cache_csv_default-1d3aaac1973def40_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  15. cache/cache_csv_default-1d8109d793352a35_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  16. cache/cache_csv_default-1ecc7a7549fcfdea_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  17. cache/cache_csv_default-1eeddf0790526c7b_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  18. cache/cache_csv_default-215074de73e76f09_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  19. cache/cache_csv_default-242181ae292241ee_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  20. cache/cache_csv_default-263b017a70fce543_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  21. cache/cache_csv_default-2a8ca29769ad0476_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  22. cache/cache_csv_default-2c97b90189817bf7_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  23. cache/cache_csv_default-3030c166054fac30_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  24. cache/cache_csv_default-365a0c686393a911_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  25. cache/cache_csv_default-3671ae337359ab4f_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  26. cache/cache_csv_default-371d32405b5d7577_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  27. cache/cache_csv_default-3b5f5887e0c60283_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  28. cache/cache_csv_default-422903a15970f1e3_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  29. cache/cache_csv_default-449dcf17eba1dc10_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  30. cache/cache_csv_default-4caa284a4ac72c2a_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  31. cache/cache_csv_default-4d6078d90c039063_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  32. cache/cache_csv_default-4e957d94d04326a9_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  33. cache/cache_csv_default-4f6c27099bb53527_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  34. cache/cache_csv_default-502853d933683bdb_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  35. cache/cache_csv_default-53eec1958d34ed11_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  36. cache/cache_csv_default-5a935366194dc6e5_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  37. cache/cache_csv_default-5f1d1406a3bfcf0b_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  38. cache/cache_csv_default-6439ec426976ccb8_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  39. cache/cache_csv_default-6555fb0c7e6de5c2_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  40. cache/cache_csv_default-67de10cf26a832b6_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  41. cache/cache_csv_default-6cbfc46a17993cd2_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  42. cache/cache_csv_default-73e50ee513b905fa_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  43. cache/cache_csv_default-747fafe34f78e023_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  44. cache/cache_csv_default-7615c4b4c8d4b6ea_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  45. cache/cache_csv_default-7636b4ed3c6760bc_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  46. cache/cache_csv_default-768e7e63b5514f07_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  47. cache/cache_csv_default-77a76b0b50997a1b_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  48. cache/cache_csv_default-7a2ef400f8e478b4_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  49. cache/cache_csv_default-7b57b57218a16632_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
  50. cache/cache_csv_default-7f002e03028b33d5_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock +0 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ /*.csv
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
README.md CHANGED
@@ -16,4 +16,6 @@ preload_from_hub:
16
  - scbirlab/thomas-2018-spark-wt
17
  ---
18
 
 
 
19
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
16
  - scbirlab/thomas-2018-spark-wt
17
  ---
18
 
19
+ [![Open in Spaces](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md-dark.svg)](https://huggingface.co/spaces/scbirlab/mic-predict)
20
+
21
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio demo for schemist."""
2
+
3
+ from typing import Iterable, List, Optional, Union
4
+ from io import TextIOWrapper
5
+ import os
6
+ os.environ["COMMANDLINE_ARGS"] = "--no-gradio-queue"
7
+
8
+ from carabiner import cast, print_err
9
+ from carabiner.pd import read_table
10
+ from duvida.autoclass import AutoModelBox
11
+ import gradio as gr
12
+ import nemony as nm
13
+ import numpy as np
14
+ import pandas as pd
15
+ from rdkit.Chem import Draw, Mol
16
+ from schemist.converting import (
17
+ _TO_FUNCTIONS,
18
+ _FROM_FUNCTIONS,
19
+ convert_string_representation,
20
+ _x2mol,
21
+ )
22
+ from schemist.tables import converter
23
+ import torch
24
+
25
+ HEADER_FILE = os.path.join("sources", "header.md")
26
+ MODEL_REPOS = {
27
+ "Klebsiella pneumoniae": "hf://scbirlab/spark-dv-fp-2503-kpn",
28
+ }
29
+
30
+ MODELBOXES = {
31
+ key: AutoModelBox.from_pretrained(val, cache_dir="./cache")
32
+ for key, val in MODEL_REPOS.items()
33
+ }
34
+
35
+ EXTRA_METRICS = {
36
+ "log10(variance)": lambda modelbox, candidates: modelbox.prediction_variance(candidates=candidates).map(lambda x: {modelbox._variance_key: torch.log10(x[modelbox._variance_key])}),
37
+ "Tanimoto nearest neighbor to training data": lambda modelbox, candidates: modelbox.tanimoto_nn(candidates=candidates),
38
+ "Doubtscore": lambda modelbox, candidates: modelbox.doubtscore(candidates=candidates).map(lambda x: {"doubtscore": torch.log10(x["doubtscore"])}),
39
+ "Information sensitivity (approx.)": lambda modelbox, candidates: modelbox.information_sensitivity(candidates=candidates, optimality_approximation=True, approximator="squared_jacobian").map(lambda x: {"information sensitivity": torch.log10(x["information sensitivity"])}),
40
+ }
41
+
42
+ def load_input_data(file: TextIOWrapper) -> pd.DataFrame:
43
+ df = read_table(file.name)
44
+ string_cols = list(df.select_dtypes(exclude=[np.number]))
45
+ df = gr.Dataframe(value=df, visible=True)
46
+ return df, gr.Dropdown(choices=string_cols, interactive=True)
47
+
48
+
49
+ def _clean_split_input(strings: str) -> List[str]:
50
+ return [s2.strip() for s in strings.split("\n") for s2 in s.split(",")]
51
+
52
+
53
+ def _convert_input(
54
+ strings: str,
55
+ input_representation: str = 'smiles',
56
+ output_representation: Union[Iterable[str], str] = 'smiles'
57
+ ) -> List[str]:
58
+ strings = _clean_split_input(strings)
59
+ converted = convert_string_representation(
60
+ strings=strings,
61
+ input_representation=input_representation,
62
+ output_representation=output_representation,
63
+ )
64
+ return {key: list(map(str, cast(val, to=list))) for key, val in converted.items()}
65
+
66
+
67
+ def convert_one(
68
+ strings: str,
69
+ input_representation: str = 'smiles',
70
+ output_representation: Union[Iterable[str], str] = 'smiles'
71
+ ):
72
+
73
+ df = pd.DataFrame({
74
+ input_representation: _clean_split_input(strings),
75
+ })
76
+
77
+ return convert_file(
78
+ df=df,
79
+ column=input_representation,
80
+ input_representation=input_representation,
81
+ output_representation=output_representation,
82
+ )
83
+
84
+
85
+ def predict_one(
86
+ strings: str,
87
+ input_representation: str = 'smiles',
88
+ predict: Union[Iterable[str], str] = 'smiles',
89
+ extra_metrics: Optional[Union[Iterable[str], str]] = None
90
+ ):
91
+ if extra_metrics is None:
92
+ extra_metrics = []
93
+ else:
94
+ extra_metrics = cast(extra_metrics, to=list)
95
+ prediction_df = convert_one(
96
+ strings=strings,
97
+ input_representation=input_representation,
98
+ output_representation=['id', 'smiles', 'inchikey', "mwt", "clogp"],
99
+ )
100
+ species_to_predict = cast(predict, to=list)
101
+ prediction_cols = []
102
+ for species in species_to_predict:
103
+ message = f"Predicting for species: {species}"
104
+ print_err(message)
105
+ gr.Info(message, duration=3)
106
+ this_modelbox = MODELBOXES[species]
107
+ this_features = this_modelbox._input_cols
108
+ this_labels = this_modelbox._label_cols
109
+ this_prediction_input = (
110
+ prediction_df
111
+ .rename(columns={
112
+ "smiles": this_features[0],
113
+ })
114
+ .assign(**{label: np.nan for label in this_labels})
115
+ )
116
+ print(this_prediction_input)
117
+ prediction = this_modelbox.predict(
118
+ data=this_prediction_input,
119
+ features=this_features,
120
+ labels=this_labels,
121
+ aggregator="mean",
122
+ cache="./cache"
123
+ ).with_format("numpy")["__prediction__"].flatten()
124
+ print(prediction)
125
+ this_col = f"{species}: predicted MIC (µM)"
126
+ prediction_df[this_col] = np.power(10., -prediction) * 1e6
127
+ prediction_cols.append(this_col)
128
+
129
+ for extra_metric in extra_metrics:
130
+ # this_modelbox._input_training_data = this_modelbox._input_training_data.remove_columns([this_modelbox._in_key])
131
+ this_col = f"{species}: {extra_metric}"
132
+ prediction_cols.append(this_col)
133
+ print(">>>", this_modelbox._input_training_data)
134
+ print(">>>", this_modelbox._input_training_data.format)
135
+ print(">>>", this_modelbox._in_key, this_modelbox._out_key)
136
+ this_extra = (
137
+ EXTRA_METRICS[extra_metric](
138
+ this_modelbox,
139
+ this_prediction_input,
140
+ )
141
+ .with_format("numpy")
142
+ )
143
+ prediction_df[this_col] = this_extra[this_extra.column_names[-1]]
144
+
145
+ return gr.DataFrame(
146
+ prediction_df[['id'] + prediction_cols + ['smiles', 'inchikey', "mwt", "clogp"]],
147
+ visible=True
148
+ )
149
+
150
+
151
+ def convert_file(
152
+ df: pd.DataFrame,
153
+ column: str = 'smiles',
154
+ input_representation: str = 'smiles',
155
+ output_representation: Union[str, Iterable[str]] = 'smiles'
156
+ ):
157
+ message = f"Converting from {input_representation} to {output_representation}..."
158
+ print_err(message)
159
+ gr.Info(message, duration=3)
160
+ errors, df = converter(
161
+ df=df,
162
+ column=column,
163
+ input_representation=input_representation,
164
+ output_representation=output_representation,
165
+ )
166
+ df = df[
167
+ cast(output_representation, to=list) +
168
+ [col for col in df if col not in output_representation]
169
+ ]
170
+ all_err = sum(err for key, err in errors.items())
171
+ message = (
172
+ f"Converted {df.shape[0]} molecules from "
173
+ f"{input_representation} to {output_representation} "
174
+ f"with {all_err} errors!"
175
+ )
176
+ print_err(message)
177
+ gr.Info(message, duration=5)
178
+ return df
179
+
180
+
181
+ def predict_file(
182
+ df: pd.DataFrame,
183
+ column: str = 'smiles',
184
+ input_representation: str = 'smiles',
185
+ extra_metrics: Optional[Union[Iterable[str], str]] = None
186
+ ):
187
+ if extra_metrics is None:
188
+ extra_metrics = []
189
+ else:
190
+ extra_metrics = cast(extra_metrics, to=list)
191
+ prediction_df = convert_file(
192
+ df,
193
+ column=column,
194
+ input_representation=input_representation,
195
+ output_representation=["id", "smiles", "inchikey", "mwt", "clogp"],
196
+ )
197
+ species_to_predict = cast(predict, to=list)
198
+ prediction_cols = []
199
+ for species in species_to_predict:
200
+ this_modelbox = MODELBOXES[species]
201
+ this_features = this_modelbox._input_cols
202
+ this_labels = this_modelbox._label_cols
203
+ this_prediction_input = (
204
+ prediction_df
205
+ .rename(columns={
206
+ "smiles": this_features[0],
207
+ })
208
+ .assign(**{label: np.nan for label in this_labels})
209
+ )
210
+ prediction = this_modelbox.predict(
211
+ data=this_prediction_input,
212
+ features=this_features,
213
+ labels=this_labels,
214
+ cache="./cache"
215
+ ).with_format("numpy")["__prediction__"].flatten()
216
+ print(prediction)
217
+ this_col = f"{species}: predicted MIC (µM)"
218
+ prediction_df[this_col] = np.power(10., -prediction) * 1e6
219
+ prediction_cols.append(this_col)
220
+
221
+ for extra_metric in extra_metrics:
222
+ # this_modelbox._input_training_data = this_modelbox._input_training_data.remove_columns([this_modelbox._in_key])
223
+ this_col = f"{species}: {extra_metric}"
224
+ prediction_cols.append(this_col)
225
+ print(">>>", this_modelbox._input_training_data)
226
+ this_extra = (
227
+ EXTRA_METRICS[extra_metric](
228
+ this_modelbox,
229
+ this_prediction_input,
230
+ )
231
+ .with_format("numpy")
232
+ )
233
+ prediction_df[this_col] = this_extra[this_extra.column_names[0]]
234
+
235
+ return prediction_df[['id'] + prediction_cols + ['smiles', 'inchikey', "mwt", "clogp"]]
236
+
237
+ def draw_one(
238
+ strings: Union[Iterable[str], str],
239
+ input_representation: str = 'smiles'
240
+ ):
241
+ _ids = _convert_input(
242
+ strings,
243
+ input_representation,
244
+ ["inchikey", "id", "pubchem_name"],
245
+ )
246
+ mols = cast(_x2mol(_clean_split_input(strings), input_representation), to=list)
247
+ if isinstance(mols, Mol):
248
+ mols = [mols]
249
+ return Draw.MolsToGridImage(
250
+ mols,
251
+ molsPerRow=min(3, len(mols)),
252
+ subImgSize=(450, 450),
253
+ legends=["\n".join(items) for items in zip(*_ids.values())],
254
+ )
255
+
256
+
257
+ def download_table(
258
+ df: pd.DataFrame
259
+ ) -> str:
260
+ df_hash = nm.hash(pd.util.hash_pandas_object(df).values)
261
+ filename = f"converted-{df_hash}.csv"
262
+ df.to_csv(filename, index=False)
263
+ return gr.DownloadButton(value=filename, visible=True)
264
+
265
+ with gr.Blocks() as demo:
266
+
267
+ with open(HEADER_FILE, 'r') as f:
268
+ header_md = f.read()
269
+ gr.Markdown(header_md)
270
+
271
+ with gr.Tab(label="Paste one per line"):
272
+ input_format_single = gr.Dropdown(
273
+ label="Input string format",
274
+ choices=list(_FROM_FUNCTIONS),
275
+ value="smiles",
276
+ interactive=True,
277
+ )
278
+ input_line = gr.Textbox(
279
+ label="Input",
280
+ placeholder="Paste your molecule here, one per line",
281
+ lines=2,
282
+ interactive=True,
283
+ submit_btn=True,
284
+ )
285
+ output_species_single = gr.CheckboxGroup(
286
+ label="Species for prediction",
287
+ choices=list(MODEL_REPOS),
288
+ value=list(MODEL_REPOS)[:1],
289
+ interactive=True,
290
+ )
291
+ extra_metric = gr.CheckboxGroup(
292
+ label="Extra metrics (can increase calculation time!)",
293
+ choices=list(EXTRA_METRICS),
294
+ value=list(EXTRA_METRICS)[:2],
295
+ interactive=True,
296
+ )
297
+ examples = gr.Examples(
298
+ examples=[
299
+ [
300
+ '\n'.join([
301
+ "C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O",
302
+ "CN1C(=NC(=O)C(=O)N1)SCC2=C(N3[C@@H]([C@@H](C3=O)NC(=O)/C(=N\OC)/C4=CSC(=N4)N)SC2)C(=O)O",
303
+ "CC(=O)NC[C@H]1CN(C(=O)O1)C2=CC(=C(C=C2)N3CCOCC3)F",
304
+ "C1CC2=CC(=NC=C2OC1)CNC3CCN(CC3)C[C@@H]4CN5C(=O)C=CC6=C5N4C(=O)C=N6",
305
+ ]),
306
+ list(MODEL_REPOS)[0],
307
+ list(EXTRA_METRICS)[:2],
308
+ ], # cipro, ceftriaxone, linezolid, gepotidacin
309
+ [
310
+ '\n'.join([
311
+ "C[C@H]1[C@H]([C@H](C[C@@H](O1)O[C@H]2C[C@@](CC3=C2C(=C4C(=C3O)C(=O)C5=C(C4=O)C(=CC=C5)OC)O)(C(=O)CO)O)N)O",
312
+ "CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=CC=C3)N)C(=O)O)C",
313
+ "CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=C(C=C3)O)N)C(=O)O)C",
314
+ ]),
315
+ list(MODEL_REPOS)[0],
316
+ list(EXTRA_METRICS)[:2],
317
+ ], # doxorubicin, ampicillin, amoxicillin
318
+ [
319
+ '\n'.join([
320
+ "C1=C(SC(=N1)SC2=NN=C(S2)N)[N+](=O)[O-]",
321
+ "C1CN(CCC12C3=CC=CC=C3NC(=O)O2)CCC4=CC=C(C=C4)C(F)(F)F",
322
+ "COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N",
323
+ "CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N",
324
+ "C1[C@@H]([C@H]([C@@H]([C@H]([C@@H]1NC(=O)[C@H](CCN)O)O[C@@H]2[C@@H]([C@H]([C@@H]([C@H](O2)CO)O)N)O)O)O[C@@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CN)O)O)O)N\nC1=CN=CC=C1C(=O)NN",
325
+ ]),
326
+ list(MODEL_REPOS)[0],
327
+ list(EXTRA_METRICS)[:2],
328
+ ], # Halicin, Abaucin, Trimethoprim, Sulfamethoxazole, Amikacin, Isoniazid
329
+ ],
330
+ example_labels=[
331
+ "Ciprofloxacin, Ceftriaxone, Linezolid, Gepotidacin",
332
+ "Doxorubicin, Ampicillin, Amoxicillin",
333
+ "Halicin, Abaucin, Trimethoprim, Sulfamethoxazole, Amikacin, Isoniazid"
334
+ ],
335
+ inputs=[input_line, output_species_single, extra_metric],
336
+ cache_mode="eager",
337
+ )
338
+ download_single = gr.DownloadButton(
339
+ label="Download predictions",
340
+ visible=False,
341
+ )
342
+ with gr.Row():
343
+ output_line = gr.DataFrame(
344
+ label="Predictions",
345
+ interactive=False,
346
+ visible=False,
347
+ )
348
+ drawing = gr.Image(label="Chemical structures")
349
+ gr.on(
350
+ [
351
+ input_line.submit,
352
+ ],
353
+ fn=predict_one,
354
+ inputs=[
355
+ input_line,
356
+ input_format_single,
357
+ output_species_single,
358
+ extra_metric,
359
+ ],
360
+ outputs={
361
+ output_line,
362
+ }
363
+ ).then(
364
+ draw_one,
365
+ inputs=[
366
+ input_line,
367
+ input_format_single,
368
+ ],
369
+ outputs=drawing,
370
+ ).then(
371
+ download_table,
372
+ inputs=output_line,
373
+ outputs=download_single
374
+ )
375
+
376
+ with gr.Tab("Convert a file"):
377
+ input_file = gr.File(
378
+ label="Upload a table of chemical compounds here",
379
+ file_types=[".xlsx", ".csv", ".tsv", ".txt"],
380
+ )
381
+ with gr.Row():
382
+ input_column = gr.Dropdown(
383
+ label="Input column name",
384
+ choices=[],
385
+ )
386
+ input_format = gr.Dropdown(
387
+ label="Input string format",
388
+ choices=list(_FROM_FUNCTIONS),
389
+ value="smiles",
390
+ interactive=True,
391
+ )
392
+ output_species = gr.CheckboxGroup(
393
+ label="Species for prediction",
394
+ choices=list(MODEL_REPOS),
395
+ value=list(MODEL_REPOS)[:1],
396
+ interactive=True,
397
+ )
398
+ go_button2 = gr.Button(
399
+ value="Predict!",
400
+ )
401
+
402
+ download = gr.DownloadButton(
403
+ label="Download converted data",
404
+ visible=False,
405
+ )
406
+ input_data = gr.Dataframe(
407
+ label="Input data",
408
+ max_height=100,
409
+ visible=False,
410
+ interactive=False,
411
+ )
412
+
413
+ input_file.upload(
414
+ load_input_data,
415
+ inputs=[input_file],
416
+ outputs=[input_data, input_column]
417
+ )
418
+ go_button2.click(
419
+ convert_file,
420
+ inputs=[
421
+ input_data,
422
+ input_column,
423
+ input_format,
424
+ output_species,
425
+ ],
426
+ outputs={
427
+ input_data,
428
+ }
429
+ ).then(
430
+ download_table,
431
+ inputs=input_data,
432
+ outputs=download
433
+ )
434
+
435
+ if __name__ == "__main__":
436
+ demo.queue()
437
+ demo.launch(share=True)
438
+
cache/cache_csv_default-00953711766d478a_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-03c2d6a24096cadb_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-06ebd4abec88f824_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-08596bdace45a9e0_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-0ccf5404d587e265_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-1152648bff9b0619_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-11d1d03ac37ee54d_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-13e8ff2cbdbb1601_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-1b04ae4fda4a32e3_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-1d3aaac1973def40_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-1d8109d793352a35_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-1ecc7a7549fcfdea_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-1eeddf0790526c7b_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-215074de73e76f09_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-242181ae292241ee_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-263b017a70fce543_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-2a8ca29769ad0476_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-2c97b90189817bf7_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-3030c166054fac30_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-365a0c686393a911_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-3671ae337359ab4f_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-371d32405b5d7577_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-3b5f5887e0c60283_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-422903a15970f1e3_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-449dcf17eba1dc10_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-4caa284a4ac72c2a_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-4d6078d90c039063_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-4e957d94d04326a9_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-4f6c27099bb53527_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-502853d933683bdb_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-53eec1958d34ed11_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-5a935366194dc6e5_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-5f1d1406a3bfcf0b_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-6439ec426976ccb8_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-6555fb0c7e6de5c2_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-67de10cf26a832b6_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-6cbfc46a17993cd2_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-73e50ee513b905fa_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-747fafe34f78e023_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-7615c4b4c8d4b6ea_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-7636b4ed3c6760bc_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-768e7e63b5514f07_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-77a76b0b50997a1b_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-7a2ef400f8e478b4_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-7b57b57218a16632_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes
cache/cache_csv_default-7f002e03028b33d5_0.0.0_a43390c7ecea6519ff2ce9d10005c8750601c9e456069be5efbd2747df45f420.lock ADDED
File without changes