Spaces:
Running
Running
GSK-2547-get-rid-of-pipeline (#51)
Browse files- remove pipeline and improve events trigger (461883adf15e41590810f0a6b15b21cb9ec07ffd)
Co-authored-by: zcy <[email protected]>
- app_text_classification.py +14 -53
- text_classification.py +63 -13
- text_classification_ui_helpers.py +67 -81
- wordings.py +20 -1
app_text_classification.py
CHANGED
|
@@ -8,11 +8,10 @@ from text_classification_ui_helpers import (
|
|
| 8 |
align_columns_and_show_prediction,
|
| 9 |
check_dataset,
|
| 10 |
precheck_model_ds_enable_example_btn,
|
| 11 |
-
select_run_mode,
|
| 12 |
try_submit,
|
| 13 |
write_column_mapping_to_config,
|
| 14 |
)
|
| 15 |
-
from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
|
| 16 |
|
| 17 |
MAX_LABELS = 40
|
| 18 |
MAX_FEATURES = 20
|
|
@@ -80,30 +79,9 @@ def get_demo():
|
|
| 80 |
column_mappings.append(gr.Dropdown(visible=False))
|
| 81 |
|
| 82 |
with gr.Accordion(label="Model Wrap Advance Config", open=True):
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
)
|
| 87 |
-
gr.HTML(
|
| 88 |
-
value="""
|
| 89 |
-
We recommend to use
|
| 90 |
-
<a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
|
| 91 |
-
Hugging Face Inference API
|
| 92 |
-
</a>
|
| 93 |
-
for the evaluation,
|
| 94 |
-
which requires your <a href="https://huggingface.co/settings/tokens">HF token</a>.
|
| 95 |
-
<br/>
|
| 96 |
-
Otherwise, an
|
| 97 |
-
<a href="https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.TextClassificationPipeline">
|
| 98 |
-
HF pipeline
|
| 99 |
-
</a>
|
| 100 |
-
will be created and run in this Space. It takes more time to get the result.
|
| 101 |
-
<br/>
|
| 102 |
-
<b>
|
| 103 |
-
Do not worry, your HF token is only used in this Space for your evaluation.
|
| 104 |
-
</b>
|
| 105 |
-
""",
|
| 106 |
-
)
|
| 107 |
inference_token = gr.Textbox(
|
| 108 |
placeholder="hf-xxxxxxxxxxxxxxxxxxxx",
|
| 109 |
value="",
|
|
@@ -112,7 +90,6 @@ def get_demo():
|
|
| 112 |
interactive=True,
|
| 113 |
)
|
| 114 |
|
| 115 |
-
|
| 116 |
with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
|
| 117 |
scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
|
| 118 |
|
|
@@ -143,37 +120,21 @@ def get_demo():
|
|
| 143 |
every=0.5,
|
| 144 |
)
|
| 145 |
|
| 146 |
-
|
| 147 |
-
check_dataset,
|
| 148 |
-
inputs=[dataset_id_input],
|
| 149 |
-
outputs=[dataset_config_input, dataset_split_input, first_line_ds, loading_status],
|
| 150 |
-
)
|
| 151 |
-
|
| 152 |
-
dataset_config_input.change(
|
| 153 |
-
check_dataset,
|
| 154 |
-
inputs=[dataset_id_input, dataset_config_input],
|
| 155 |
-
outputs=[dataset_config_input, dataset_split_input, first_line_ds, loading_status],
|
| 156 |
-
)
|
| 157 |
-
|
| 158 |
-
dataset_split_input.change(
|
| 159 |
-
check_dataset,
|
| 160 |
-
inputs=[dataset_id_input, dataset_config_input, dataset_split_input],
|
| 161 |
-
outputs=[dataset_config_input, dataset_split_input, first_line_ds, loading_status],
|
| 162 |
-
)
|
| 163 |
-
|
| 164 |
scanners.change(write_scanners, inputs=[scanners, uid_label])
|
| 165 |
|
| 166 |
-
run_inference.change(
|
| 167 |
-
select_run_mode,
|
| 168 |
-
inputs=[run_inference],
|
| 169 |
-
outputs=[inference_token],
|
| 170 |
-
)
|
| 171 |
-
|
| 172 |
gr.on(
|
| 173 |
triggers=[model_id_input.change],
|
| 174 |
fn=get_related_datasets_from_leaderboard,
|
| 175 |
inputs=[model_id_input],
|
| 176 |
outputs=[dataset_id_input],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
)
|
| 178 |
|
| 179 |
gr.on(
|
|
@@ -209,7 +170,7 @@ def get_demo():
|
|
| 209 |
dataset_config_input,
|
| 210 |
dataset_split_input,
|
| 211 |
],
|
| 212 |
-
outputs=[example_btn, loading_status],
|
| 213 |
)
|
| 214 |
|
| 215 |
gr.on(
|
|
@@ -254,7 +215,7 @@ def get_demo():
|
|
| 254 |
)
|
| 255 |
|
| 256 |
def enable_run_btn(run_inference, inference_token, model_id, dataset_id, dataset_config, dataset_split):
|
| 257 |
-
if run_inference
|
| 258 |
return gr.update(interactive=False)
|
| 259 |
if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
|
| 260 |
return gr.update(interactive=False)
|
|
|
|
| 8 |
align_columns_and_show_prediction,
|
| 9 |
check_dataset,
|
| 10 |
precheck_model_ds_enable_example_btn,
|
|
|
|
| 11 |
try_submit,
|
| 12 |
write_column_mapping_to_config,
|
| 13 |
)
|
| 14 |
+
from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD, USE_INFERENCE_API_TIP
|
| 15 |
|
| 16 |
MAX_LABELS = 40
|
| 17 |
MAX_FEATURES = 20
|
|
|
|
| 79 |
column_mappings.append(gr.Dropdown(visible=False))
|
| 80 |
|
| 81 |
with gr.Accordion(label="Model Wrap Advance Config", open=True):
|
| 82 |
+
gr.HTML(USE_INFERENCE_API_TIP)
|
| 83 |
+
|
| 84 |
+
run_inference = gr.Checkbox(value=True, label="Run with Inference API")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
inference_token = gr.Textbox(
|
| 86 |
placeholder="hf-xxxxxxxxxxxxxxxxxxxx",
|
| 87 |
value="",
|
|
|
|
| 90 |
interactive=True,
|
| 91 |
)
|
| 92 |
|
|
|
|
| 93 |
with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
|
| 94 |
scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
|
| 95 |
|
|
|
|
| 120 |
every=0.5,
|
| 121 |
)
|
| 122 |
|
| 123 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
scanners.change(write_scanners, inputs=[scanners, uid_label])
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
gr.on(
|
| 127 |
triggers=[model_id_input.change],
|
| 128 |
fn=get_related_datasets_from_leaderboard,
|
| 129 |
inputs=[model_id_input],
|
| 130 |
outputs=[dataset_id_input],
|
| 131 |
+
).then(fn=check_dataset, inputs=[dataset_id_input], outputs=[dataset_config_input, dataset_split_input, loading_status])
|
| 132 |
+
|
| 133 |
+
gr.on(
|
| 134 |
+
triggers=[dataset_id_input.input],
|
| 135 |
+
fn=check_dataset,
|
| 136 |
+
inputs=[dataset_id_input],
|
| 137 |
+
outputs=[dataset_config_input, dataset_split_input, loading_status]
|
| 138 |
)
|
| 139 |
|
| 140 |
gr.on(
|
|
|
|
| 170 |
dataset_config_input,
|
| 171 |
dataset_split_input,
|
| 172 |
],
|
| 173 |
+
outputs=[example_btn, first_line_ds, loading_status],
|
| 174 |
)
|
| 175 |
|
| 176 |
gr.on(
|
|
|
|
| 215 |
)
|
| 216 |
|
| 217 |
def enable_run_btn(run_inference, inference_token, model_id, dataset_id, dataset_config, dataset_split):
|
| 218 |
+
if not run_inference or inference_token == "":
|
| 219 |
return gr.update(interactive=False)
|
| 220 |
if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
|
| 221 |
return gr.update(interactive=False)
|
text_classification.py
CHANGED
|
@@ -5,15 +5,13 @@ import datasets
|
|
| 5 |
import huggingface_hub
|
| 6 |
import pandas as pd
|
| 7 |
from transformers import pipeline
|
|
|
|
|
|
|
| 8 |
|
|
|
|
| 9 |
|
| 10 |
-
def get_labels_and_features_from_dataset(
|
| 11 |
-
if not dataset_config:
|
| 12 |
-
dataset_config = "default"
|
| 13 |
-
if not split:
|
| 14 |
-
split = "train"
|
| 15 |
try:
|
| 16 |
-
ds = datasets.load_dataset(dataset_id, dataset_config)[split]
|
| 17 |
dataset_features = ds.features
|
| 18 |
label_keys = [i for i in dataset_features.keys() if i.startswith('label')]
|
| 19 |
if len(label_keys) == 0: # no labels found
|
|
@@ -29,12 +27,60 @@ def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
|
|
| 29 |
return labels, features
|
| 30 |
except Exception as e:
|
| 31 |
logging.warning(
|
| 32 |
-
f"Failed
|
| 33 |
)
|
| 34 |
return None, None
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
try:
|
| 39 |
task = huggingface_hub.model_info(model_id).pipeline_tag
|
| 40 |
except Exception:
|
|
@@ -207,7 +253,7 @@ def check_dataset_features_validity(d_id, config, split):
|
|
| 207 |
return df, dataset_features
|
| 208 |
|
| 209 |
|
| 210 |
-
def get_example_prediction(
|
| 211 |
# get a sample prediction from the model on the dataset
|
| 212 |
prediction_input = None
|
| 213 |
prediction_result = None
|
|
@@ -220,9 +266,13 @@ def get_example_prediction(ppl, dataset_id, dataset_config, dataset_split):
|
|
| 220 |
else:
|
| 221 |
prediction_input = ds[0]["text"]
|
| 222 |
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
prediction_result = {
|
| 227 |
f'{result["label"]}': result["score"] for result in results
|
| 228 |
}
|
|
@@ -298,4 +348,4 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
|
|
| 298 |
prediction_result,
|
| 299 |
id2label_df,
|
| 300 |
feature_map_df,
|
| 301 |
-
)
|
|
|
|
| 5 |
import huggingface_hub
|
| 6 |
import pandas as pd
|
| 7 |
from transformers import pipeline
|
| 8 |
+
import requests
|
| 9 |
+
import os
|
| 10 |
|
| 11 |
+
HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
|
| 12 |
|
| 13 |
+
def get_labels_and_features_from_dataset(ds):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
try:
|
|
|
|
| 15 |
dataset_features = ds.features
|
| 16 |
label_keys = [i for i in dataset_features.keys() if i.startswith('label')]
|
| 17 |
if len(label_keys) == 0: # no labels found
|
|
|
|
| 27 |
return labels, features
|
| 28 |
except Exception as e:
|
| 29 |
logging.warning(
|
| 30 |
+
f"Get Labels/Features Failed for dataset: {e}"
|
| 31 |
)
|
| 32 |
return None, None
|
| 33 |
|
| 34 |
+
def check_model_task(model_id):
|
| 35 |
+
# check if model is valid on huggingface
|
| 36 |
+
try:
|
| 37 |
+
task = huggingface_hub.model_info(model_id).pipeline_tag
|
| 38 |
+
if task is None:
|
| 39 |
+
return None
|
| 40 |
+
return task
|
| 41 |
+
except Exception:
|
| 42 |
+
return None
|
| 43 |
+
|
| 44 |
+
def get_model_labels(model_id, example_input):
|
| 45 |
+
hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
|
| 46 |
+
payload = {"inputs": example_input, "options": {"use_cache": True}}
|
| 47 |
+
response = hf_inference_api(model_id, hf_token, payload)
|
| 48 |
+
if "error" in response:
|
| 49 |
+
return None
|
| 50 |
+
return extract_from_response(response, "label")
|
| 51 |
+
|
| 52 |
+
def extract_from_response(data, key):
|
| 53 |
+
results = []
|
| 54 |
+
|
| 55 |
+
if isinstance(data, dict):
|
| 56 |
+
res = data.get(key)
|
| 57 |
+
if res is not None:
|
| 58 |
+
results.append(res)
|
| 59 |
|
| 60 |
+
for value in data.values():
|
| 61 |
+
results.extend(extract_from_response(value, key))
|
| 62 |
+
|
| 63 |
+
elif isinstance(data, list):
|
| 64 |
+
for element in data:
|
| 65 |
+
results.extend(extract_from_response(element, key))
|
| 66 |
+
|
| 67 |
+
return results
|
| 68 |
+
|
| 69 |
+
def hf_inference_api(model_id, hf_token, payload):
|
| 70 |
+
hf_inference_api_endpoint = os.environ.get(
|
| 71 |
+
"HF_INFERENCE_ENDPOINT", default="https://api-inference.huggingface.co"
|
| 72 |
+
)
|
| 73 |
+
url = f"{hf_inference_api_endpoint}/models/{model_id}"
|
| 74 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
| 75 |
+
response = requests.post(url, headers=headers, json=payload)
|
| 76 |
+
if response.status_code != 200:
|
| 77 |
+
logging.ERROR(f"Request to inference API returns {response.status_code}")
|
| 78 |
+
try:
|
| 79 |
+
return response.json()
|
| 80 |
+
except Exception:
|
| 81 |
+
return {"error": response.content}
|
| 82 |
+
|
| 83 |
+
def check_model_pipeline(model_id):
|
| 84 |
try:
|
| 85 |
task = huggingface_hub.model_info(model_id).pipeline_tag
|
| 86 |
except Exception:
|
|
|
|
| 253 |
return df, dataset_features
|
| 254 |
|
| 255 |
|
| 256 |
+
def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
|
| 257 |
# get a sample prediction from the model on the dataset
|
| 258 |
prediction_input = None
|
| 259 |
prediction_result = None
|
|
|
|
| 266 |
else:
|
| 267 |
prediction_input = ds[0]["text"]
|
| 268 |
|
| 269 |
+
hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
|
| 270 |
+
payload = {"inputs": prediction_input, "options": {"use_cache": True}}
|
| 271 |
+
results = hf_inference_api(model_id, hf_token, payload)
|
| 272 |
+
while isinstance(results, list):
|
| 273 |
+
if isinstance(results[0], dict):
|
| 274 |
+
break
|
| 275 |
+
results = results[0]
|
| 276 |
prediction_result = {
|
| 277 |
f'{result["label"]}': result["score"] for result in results
|
| 278 |
}
|
|
|
|
| 348 |
prediction_result,
|
| 349 |
id2label_df,
|
| 350 |
feature_map_df,
|
| 351 |
+
)
|
text_classification_ui_helpers.py
CHANGED
|
@@ -9,7 +9,6 @@ import leaderboard
|
|
| 9 |
import datasets
|
| 10 |
import gradio as gr
|
| 11 |
import pandas as pd
|
| 12 |
-
from transformers.pipelines import TextClassificationPipeline
|
| 13 |
|
| 14 |
from io_utils import (
|
| 15 |
get_yaml_path,
|
|
@@ -19,7 +18,7 @@ from io_utils import (
|
|
| 19 |
write_log_to_user_file,
|
| 20 |
)
|
| 21 |
from text_classification import (
|
| 22 |
-
|
| 23 |
get_example_prediction,
|
| 24 |
get_labels_and_features_from_dataset,
|
| 25 |
)
|
|
@@ -43,72 +42,55 @@ HF_GSK_HUB_HF_TOKEN = "GSK_HF_TOKEN"
|
|
| 43 |
HF_GSK_HUB_UNLOCK_TOKEN = "GSK_HUB_UNLOCK_TOKEN"
|
| 44 |
|
| 45 |
LEADERBOARD = "giskard-bot/evaluator-leaderboard"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def get_related_datasets_from_leaderboard(model_id):
|
| 47 |
records = leaderboard.records
|
| 48 |
model_records = records[records["model_id"] == model_id]
|
| 49 |
-
datasets_unique = model_records["dataset_id"].unique()
|
|
|
|
| 50 |
if len(datasets_unique) == 0:
|
| 51 |
all_unique_datasets = list(records["dataset_id"].unique())
|
| 52 |
-
print(type(all_unique_datasets), all_unique_datasets)
|
| 53 |
return gr.update(choices=all_unique_datasets, value="")
|
|
|
|
| 54 |
return gr.update(choices=datasets_unique, value=datasets_unique[0])
|
| 55 |
|
| 56 |
|
| 57 |
logger = logging.getLogger(__file__)
|
| 58 |
|
| 59 |
|
| 60 |
-
def check_dataset(dataset_id
|
| 61 |
-
|
| 62 |
-
splits = ["default"]
|
| 63 |
-
logger.info(f"Loading {dataset_id}, {dataset_config}, {dataset_split}")
|
| 64 |
try:
|
| 65 |
configs = datasets.get_dataset_config_names(dataset_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
splits = list(
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
)
|
| 71 |
-
if dataset_config == None:
|
| 72 |
-
dataset_config = configs[0]
|
| 73 |
-
dataset_split = splits[0]
|
| 74 |
-
elif dataset_split == None:
|
| 75 |
-
dataset_split = splits[0]
|
| 76 |
except Exception as e:
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
| 80 |
)
|
| 81 |
-
if dataset_config == None:
|
| 82 |
-
return (
|
| 83 |
-
gr.Dropdown(configs, value=configs[0], visible=True),
|
| 84 |
-
gr.Dropdown(splits, value=splits[0], visible=True),
|
| 85 |
-
gr.DataFrame(pd.DataFrame(), visible=False),
|
| 86 |
-
"",
|
| 87 |
-
)
|
| 88 |
-
elif dataset_split == None:
|
| 89 |
-
return (
|
| 90 |
-
gr.Dropdown(configs, value=dataset_config, visible=True),
|
| 91 |
-
gr.Dropdown(splits, value=splits[0], visible=True),
|
| 92 |
-
gr.DataFrame(pd.DataFrame(), visible=False),
|
| 93 |
-
"",
|
| 94 |
-
)
|
| 95 |
-
|
| 96 |
-
dataset_dict = datasets.load_dataset(dataset_id, dataset_config)
|
| 97 |
-
dataframe: pd.DataFrame = dataset_dict[dataset_split].to_pandas().head(5)
|
| 98 |
-
return (
|
| 99 |
-
gr.Dropdown(configs, value=dataset_config, visible=True),
|
| 100 |
-
gr.Dropdown(splits, value=dataset_split, visible=True),
|
| 101 |
-
gr.DataFrame(dataframe, visible=True),
|
| 102 |
-
"",
|
| 103 |
-
)
|
| 104 |
|
| 105 |
|
| 106 |
-
def select_run_mode(run_inf):
|
| 107 |
-
if run_inf:
|
| 108 |
-
return gr.update(visible=True)
|
| 109 |
-
else:
|
| 110 |
-
return gr.update(visible=False)
|
| 111 |
-
|
| 112 |
|
| 113 |
def write_column_mapping_to_config(uid, *labels):
|
| 114 |
# TODO: Substitute 'text' with more features for zero-shot
|
|
@@ -144,8 +126,7 @@ def export_mappings(all_mappings, key, subkeys, values):
|
|
| 144 |
return all_mappings
|
| 145 |
|
| 146 |
|
| 147 |
-
def list_labels_and_features_from_dataset(ds_labels, ds_features,
|
| 148 |
-
model_labels = list(model_id2label.values())
|
| 149 |
all_mappings = read_column_mapping(uid)
|
| 150 |
# For flattened raw datasets with no labels
|
| 151 |
# check if there are shared labels between model and dataset
|
|
@@ -163,7 +144,7 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label
|
|
| 163 |
gr.Dropdown(
|
| 164 |
label=f"{label}",
|
| 165 |
choices=model_labels,
|
| 166 |
-
value=
|
| 167 |
interactive=True,
|
| 168 |
visible=True,
|
| 169 |
)
|
|
@@ -195,25 +176,37 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label
|
|
| 195 |
def precheck_model_ds_enable_example_btn(
|
| 196 |
model_id, dataset_id, dataset_config, dataset_split
|
| 197 |
):
|
| 198 |
-
|
| 199 |
-
if
|
| 200 |
gr.Warning("Please check your model.")
|
| 201 |
return gr.update(interactive=False), ""
|
| 202 |
-
ds_labels, ds_features = get_labels_and_features_from_dataset(
|
| 203 |
-
dataset_id, dataset_config, dataset_split
|
| 204 |
-
)
|
| 205 |
-
if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
|
| 206 |
-
gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
|
| 207 |
-
return gr.update(interactive=False), ""
|
| 208 |
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
|
| 212 |
def align_columns_and_show_prediction(
|
| 213 |
model_id, dataset_id, dataset_config, dataset_split, uid, run_inference, inference_token
|
| 214 |
):
|
| 215 |
-
|
| 216 |
-
if
|
| 217 |
gr.Warning("Please check your model.")
|
| 218 |
return (
|
| 219 |
gr.update(visible=False),
|
|
@@ -228,20 +221,15 @@ def align_columns_and_show_prediction(
|
|
| 228 |
gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
|
| 229 |
]
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
return (
|
| 234 |
-
gr.update(visible=False),
|
| 235 |
-
gr.update(visible=False),
|
| 236 |
-
gr.update(visible=False, open=False),
|
| 237 |
-
gr.update(interactive=False),
|
| 238 |
-
*dropdown_placement,
|
| 239 |
-
)
|
| 240 |
-
model_id2label = ppl.model.config.id2label
|
| 241 |
-
ds_labels, ds_features = get_labels_and_features_from_dataset(
|
| 242 |
-
dataset_id, dataset_config, dataset_split
|
| 243 |
)
|
| 244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
# when dataset does not have labels or features
|
| 246 |
if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
|
| 247 |
gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
|
|
@@ -257,14 +245,14 @@ def align_columns_and_show_prediction(
|
|
| 257 |
column_mappings = list_labels_and_features_from_dataset(
|
| 258 |
ds_labels,
|
| 259 |
ds_features,
|
| 260 |
-
|
| 261 |
uid,
|
| 262 |
)
|
| 263 |
|
| 264 |
# when labels or features are not aligned
|
| 265 |
# show manually column mapping
|
| 266 |
if (
|
| 267 |
-
collections.Counter(
|
| 268 |
or ds_features[0] != "text"
|
| 269 |
):
|
| 270 |
return (
|
|
@@ -276,9 +264,6 @@ def align_columns_and_show_prediction(
|
|
| 276 |
*column_mappings,
|
| 277 |
)
|
| 278 |
|
| 279 |
-
prediction_input, prediction_output = get_example_prediction(
|
| 280 |
-
ppl, dataset_id, dataset_config, dataset_split
|
| 281 |
-
)
|
| 282 |
return (
|
| 283 |
gr.update(value=get_styled_input(prediction_input), visible=True),
|
| 284 |
gr.update(value=prediction_output, visible=True),
|
|
@@ -322,10 +307,10 @@ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
|
|
| 322 |
if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
|
| 323 |
leaderboard_dataset = LEADERBOARD
|
| 324 |
|
| 325 |
-
|
| 326 |
-
if inference and inference_token:
|
| 327 |
inference_type = "hf_inference_api"
|
| 328 |
|
|
|
|
| 329 |
# TODO: Set column mapping for some dataset such as `amazon_polarity`
|
| 330 |
command = [
|
| 331 |
"giskard_scanner",
|
|
@@ -354,6 +339,7 @@ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
|
|
| 354 |
"--inference_api_token",
|
| 355 |
inference_token,
|
| 356 |
]
|
|
|
|
| 357 |
# The token to publish post
|
| 358 |
if os.environ.get(HF_WRITE_TOKEN):
|
| 359 |
command.append("--hf_token")
|
|
|
|
| 9 |
import datasets
|
| 10 |
import gradio as gr
|
| 11 |
import pandas as pd
|
|
|
|
| 12 |
|
| 13 |
from io_utils import (
|
| 14 |
get_yaml_path,
|
|
|
|
| 18 |
write_log_to_user_file,
|
| 19 |
)
|
| 20 |
from text_classification import (
|
| 21 |
+
check_model_task,
|
| 22 |
get_example_prediction,
|
| 23 |
get_labels_and_features_from_dataset,
|
| 24 |
)
|
|
|
|
| 42 |
HF_GSK_HUB_UNLOCK_TOKEN = "GSK_HUB_UNLOCK_TOKEN"
|
| 43 |
|
| 44 |
LEADERBOARD = "giskard-bot/evaluator-leaderboard"
|
| 45 |
+
|
| 46 |
+
global ds_dict, ds_config
|
| 47 |
+
ds_dict = None
|
| 48 |
+
ds_config = None
|
| 49 |
+
|
| 50 |
def get_related_datasets_from_leaderboard(model_id):
|
| 51 |
records = leaderboard.records
|
| 52 |
model_records = records[records["model_id"] == model_id]
|
| 53 |
+
datasets_unique = list(model_records["dataset_id"].unique())
|
| 54 |
+
|
| 55 |
if len(datasets_unique) == 0:
|
| 56 |
all_unique_datasets = list(records["dataset_id"].unique())
|
|
|
|
| 57 |
return gr.update(choices=all_unique_datasets, value="")
|
| 58 |
+
|
| 59 |
return gr.update(choices=datasets_unique, value=datasets_unique[0])
|
| 60 |
|
| 61 |
|
| 62 |
logger = logging.getLogger(__file__)
|
| 63 |
|
| 64 |
|
| 65 |
+
def check_dataset(dataset_id):
|
| 66 |
+
logger.info(f"Loading {dataset_id}")
|
|
|
|
|
|
|
| 67 |
try:
|
| 68 |
configs = datasets.get_dataset_config_names(dataset_id)
|
| 69 |
+
if len(configs) == 0:
|
| 70 |
+
return (
|
| 71 |
+
gr.update(),
|
| 72 |
+
gr.update(),
|
| 73 |
+
""
|
| 74 |
+
)
|
| 75 |
splits = list(
|
| 76 |
+
datasets.load_dataset(
|
| 77 |
+
dataset_id, configs[0]
|
| 78 |
+
).keys()
|
| 79 |
+
)
|
| 80 |
+
return (
|
| 81 |
+
gr.update(choices=configs, value=configs[0], visible=True),
|
| 82 |
+
gr.update(choices=splits, value=splits[0], visible=True),
|
| 83 |
+
""
|
| 84 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
except Exception as e:
|
| 86 |
+
logger.warn(f"Check your dataset {dataset_id}: {e}")
|
| 87 |
+
return (
|
| 88 |
+
gr.update(),
|
| 89 |
+
gr.update(),
|
| 90 |
+
""
|
| 91 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
def write_column_mapping_to_config(uid, *labels):
|
| 96 |
# TODO: Substitute 'text' with more features for zero-shot
|
|
|
|
| 126 |
return all_mappings
|
| 127 |
|
| 128 |
|
| 129 |
+
def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels, uid):
|
|
|
|
| 130 |
all_mappings = read_column_mapping(uid)
|
| 131 |
# For flattened raw datasets with no labels
|
| 132 |
# check if there are shared labels between model and dataset
|
|
|
|
| 144 |
gr.Dropdown(
|
| 145 |
label=f"{label}",
|
| 146 |
choices=model_labels,
|
| 147 |
+
value=model_labels[i % len(model_labels)],
|
| 148 |
interactive=True,
|
| 149 |
visible=True,
|
| 150 |
)
|
|
|
|
| 176 |
def precheck_model_ds_enable_example_btn(
|
| 177 |
model_id, dataset_id, dataset_config, dataset_split
|
| 178 |
):
|
| 179 |
+
model_task = check_model_task(model_id)
|
| 180 |
+
if model_task is None or model_task != "text-classification":
|
| 181 |
gr.Warning("Please check your model.")
|
| 182 |
return gr.update(interactive=False), ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
+
if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
|
| 185 |
+
return (gr.update(), gr.update(), "")
|
| 186 |
+
|
| 187 |
+
try:
|
| 188 |
+
ds = datasets.load_dataset(dataset_id, dataset_config)
|
| 189 |
+
df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
|
| 190 |
+
ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
|
| 191 |
+
|
| 192 |
+
if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
|
| 193 |
+
gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
|
| 194 |
+
return (gr.update(interactive=False), gr.update(value=df, visible=True), "")
|
| 195 |
+
|
| 196 |
+
return (gr.update(interactive=True), gr.update(value=df, visible=True), "")
|
| 197 |
+
except Exception as e:
|
| 198 |
+
# Config or split wrong
|
| 199 |
+
gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
|
| 200 |
+
return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
|
| 201 |
+
|
| 202 |
+
|
| 203 |
|
| 204 |
|
| 205 |
def align_columns_and_show_prediction(
|
| 206 |
model_id, dataset_id, dataset_config, dataset_split, uid, run_inference, inference_token
|
| 207 |
):
|
| 208 |
+
model_task = check_model_task(model_id)
|
| 209 |
+
if model_task is None or model_task != "text-classification":
|
| 210 |
gr.Warning("Please check your model.")
|
| 211 |
return (
|
| 212 |
gr.update(visible=False),
|
|
|
|
| 221 |
gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
|
| 222 |
]
|
| 223 |
|
| 224 |
+
prediction_input, prediction_output = get_example_prediction(
|
| 225 |
+
model_id, dataset_id, dataset_config, dataset_split
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
)
|
| 227 |
|
| 228 |
+
model_labels = list(prediction_output.keys())
|
| 229 |
+
|
| 230 |
+
ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
|
| 231 |
+
ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
|
| 232 |
+
|
| 233 |
# when dataset does not have labels or features
|
| 234 |
if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
|
| 235 |
gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
|
|
|
|
| 245 |
column_mappings = list_labels_and_features_from_dataset(
|
| 246 |
ds_labels,
|
| 247 |
ds_features,
|
| 248 |
+
model_labels,
|
| 249 |
uid,
|
| 250 |
)
|
| 251 |
|
| 252 |
# when labels or features are not aligned
|
| 253 |
# show manually column mapping
|
| 254 |
if (
|
| 255 |
+
collections.Counter(model_labels) != collections.Counter(ds_labels)
|
| 256 |
or ds_features[0] != "text"
|
| 257 |
):
|
| 258 |
return (
|
|
|
|
| 264 |
*column_mappings,
|
| 265 |
)
|
| 266 |
|
|
|
|
|
|
|
|
|
|
| 267 |
return (
|
| 268 |
gr.update(value=get_styled_input(prediction_input), visible=True),
|
| 269 |
gr.update(value=prediction_output, visible=True),
|
|
|
|
| 307 |
if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
|
| 308 |
leaderboard_dataset = LEADERBOARD
|
| 309 |
|
| 310 |
+
if inference:
|
|
|
|
| 311 |
inference_type = "hf_inference_api"
|
| 312 |
|
| 313 |
+
|
| 314 |
# TODO: Set column mapping for some dataset such as `amazon_polarity`
|
| 315 |
command = [
|
| 316 |
"giskard_scanner",
|
|
|
|
| 339 |
"--inference_api_token",
|
| 340 |
inference_token,
|
| 341 |
]
|
| 342 |
+
|
| 343 |
# The token to publish post
|
| 344 |
if os.environ.get(HF_WRITE_TOKEN):
|
| 345 |
command.append("--hf_token")
|
wordings.py
CHANGED
|
@@ -38,7 +38,26 @@ MAPPING_STYLED_ERROR_WARNING = """
|
|
| 38 |
</h3>
|
| 39 |
"""
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def get_styled_input(input):
|
| 42 |
-
return f"""<h3 style="text-align: center;color: #
|
| 43 |
Sample input: {input}
|
| 44 |
</h3>"""
|
|
|
|
| 38 |
</h3>
|
| 39 |
"""
|
| 40 |
|
| 41 |
+
USE_INFERENCE_API_TIP = """
|
| 42 |
+
We recommend to use
|
| 43 |
+
<a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
|
| 44 |
+
Hugging Face Inference API
|
| 45 |
+
</a>
|
| 46 |
+
for the evaluation,
|
| 47 |
+
which requires your <a href="https://huggingface.co/settings/tokens">HF token</a>.
|
| 48 |
+
<br/>
|
| 49 |
+
Otherwise, an
|
| 50 |
+
<a href="https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.TextClassificationPipeline">
|
| 51 |
+
HF pipeline
|
| 52 |
+
</a>
|
| 53 |
+
will be created and run in this Space. It takes more time to get the result.
|
| 54 |
+
<br/>
|
| 55 |
+
<b>
|
| 56 |
+
Do not worry, your HF token is only used in this Space for your evaluation.
|
| 57 |
+
</b>
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
def get_styled_input(input):
|
| 61 |
+
return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
|
| 62 |
Sample input: {input}
|
| 63 |
</h3>"""
|