giskard-evaluator

Running

App Files Files Community

200

weixuan-giskard commited on Jan 4, 2024

Commit

3573a39

1 Parent(s): 970a44b

Format with black and fix import

Browse files

Files changed (12) hide show

app.py +5 -6
app_leaderboard.py +68 -33
app_legacy.py +344 -159
app_text_classification.py +105 -60
fetch_utils.py +11 -4
io_utils.py +22 -9
mlflow_test.py +20 -0
run_jobs.py +10 -5
text_classification.py +83 -42
text_classification_ui_helpers.py +113 -48
validate_queue.py +24 -0
wordings.py +8 -8

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import gradio as gr
-import atexit
-from app_text_classification import get_demo as get_demo_text_classification
 from app_leaderboard import get_demo as get_demo_leaderboard
 from run_jobs import start_process_run_job, stop_thread
-import threading
 if threading.current_thread() is not threading.main_thread():
     t = threading.current_thread()
@@ -14,7 +15,7 @@ try:
             get_demo_text_classification(demo)
         with gr.Tab("Leaderboard"):
             get_demo_leaderboard()
     start_process_run_job()
     demo.queue(max_size=100)
@@ -24,5 +25,3 @@ try:
 except Exception:
     print("stop background thread")
     stop_thread()

+import atexit
+import threading
 import gradio as gr
 from app_leaderboard import get_demo as get_demo_leaderboard
+from app_text_classification import get_demo as get_demo_text_classification
 from run_jobs import start_process_run_job, stop_thread
 if threading.current_thread() is not threading.main_thread():
     t = threading.current_thread()
             get_demo_text_classification(demo)
         with gr.Tab("Leaderboard"):
             get_demo_leaderboard()
     start_process_run_job()
     demo.queue(max_size=100)
 except Exception:
     print("stop background thread")
     stop_thread()

app_leaderboard.py CHANGED Viewed

@@ -1,8 +1,11 @@
-import gradio as gr
-import datasets
 import logging
 from fetch_utils import check_dataset_and_get_config, check_dataset_and_get_split
 def get_records_from_dataset_repo(dataset_id):
     dataset_config = check_dataset_and_get_config(dataset_id)
@@ -15,83 +18,115 @@ def get_records_from_dataset_repo(dataset_id):
         df = ds.to_pandas()
         return df
     except Exception as e:
-        logging.warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
         return None
 def get_model_ids(ds):
     logging.info(f"Dataset {ds} column names: {ds['model_id']}")
-    models = ds['model_id'].tolist()
     # return unique elements in the list model_ids
     model_ids = list(set(models))
     return model_ids
 def get_dataset_ids(ds):
     logging.info(f"Dataset {ds} column names: {ds['dataset_id']}")
-    datasets = ds['dataset_id'].tolist()
     dataset_ids = list(set(datasets))
     return dataset_ids
 def get_types(ds):
     # set types for each column
     types = [str(t) for t in ds.dtypes.to_list()]
-    types = [t.replace('object', 'markdown') for t in types]
-    types = [t.replace('float64', 'number') for t in types]
-    types = [t.replace('int64', 'number') for t in types]
     return types
 def get_display_df(df):
     # style all elements in the model_id column
     display_df = df.copy()
     columns = display_df.columns.tolist()
-    if 'model_id' in columns:
-        display_df['model_id'] = display_df['model_id'].apply(lambda x: f'<p href="https://huggingface.co/{x}" style="color:blue">🔗{x}</p>')
     # style all elements in the dataset_id column
-    if 'dataset_id' in columns:
-        display_df['dataset_id'] = display_df['dataset_id'].apply(lambda x: f'<p href="https://huggingface.co/datasets/{x}" style="color:blue">🔗{x}</p>')
     # style all elements in the report_link column
-    if 'report_link' in columns:
-        display_df['report_link'] = display_df['report_link'].apply(lambda x: f'<p href="{x}" style="color:blue">🔗{x}</p>')
     return display_df
 def get_demo():
-    records = get_records_from_dataset_repo('ZeroCommand/test-giskard-report')
     model_ids = get_model_ids(records)
     dataset_ids = get_dataset_ids(records)
     column_names = records.columns.tolist()
-    default_columns = ['model_id', 'dataset_id', 'total_issues', 'report_link']
-    default_df = records[default_columns] # extract columns selected
     types = get_types(default_df)
-    display_df = get_display_df(default_df) # the styled dataframe to display
     with gr.Row():
-        task_select = gr.Dropdown(label='Task', choices=['text_classification', 'tabular'], value='text_classification', interactive=True)
-        model_select = gr.Dropdown(label='Model id', choices=model_ids, interactive=True)
-        dataset_select = gr.Dropdown(label='Dataset id', choices=dataset_ids, interactive=True)
     with gr.Row():
-        columns_select = gr.CheckboxGroup(label='Show columns', choices=column_names, value=default_columns, interactive=True)
     with gr.Row():
         leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
-    @gr.on(triggers=[model_select.change, dataset_select.change, columns_select.change, task_select.change],
-           inputs=[model_select, dataset_select, columns_select, task_select],
-           outputs=[leaderboard_df])
     def filter_table(model_id, dataset_id, columns, task):
         # filter the table based on task
-        df = records[(records['task'] == task)]
         # filter the table based on the model_id and dataset_id
         if model_id:
-            df = records[(records['model_id'] == model_id)]
         if dataset_id:
-            df = records[(records['dataset_id'] == dataset_id)]
         # filter the table based on the columns
         df = df[columns]
         types = get_types(df)
         display_df = get_display_df(df)
-        return (
-            gr.update(value=display_df, datatype=types, interactive=False)
-        )

 import logging
+import datasets
+import gradio as gr
 from fetch_utils import check_dataset_and_get_config, check_dataset_and_get_split
 def get_records_from_dataset_repo(dataset_id):
     dataset_config = check_dataset_and_get_config(dataset_id)
         df = ds.to_pandas()
         return df
     except Exception as e:
+        logging.warning(
+            f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
+        )
         return None
 def get_model_ids(ds):
     logging.info(f"Dataset {ds} column names: {ds['model_id']}")
+    models = ds["model_id"].tolist()
     # return unique elements in the list model_ids
     model_ids = list(set(models))
     return model_ids
 def get_dataset_ids(ds):
     logging.info(f"Dataset {ds} column names: {ds['dataset_id']}")
+    datasets = ds["dataset_id"].tolist()
     dataset_ids = list(set(datasets))
     return dataset_ids
 def get_types(ds):
     # set types for each column
     types = [str(t) for t in ds.dtypes.to_list()]
+    types = [t.replace("object", "markdown") for t in types]
+    types = [t.replace("float64", "number") for t in types]
+    types = [t.replace("int64", "number") for t in types]
     return types
 def get_display_df(df):
     # style all elements in the model_id column
     display_df = df.copy()
     columns = display_df.columns.tolist()
+    if "model_id" in columns:
+        display_df["model_id"] = display_df["model_id"].apply(
+            lambda x: f'<p href="https://huggingface.co/{x}" style="color:blue">🔗{x}</p>
+        ')
     # style all elements in the dataset_id column
+    if "dataset_id" in columns:
+        display_df["dataset_id"] = display_df["dataset_id"].apply(
+            lambda x: f'<p href="https://huggingface.co/datasets/{x}" style="color:blue">🔗{x}</p>
+        ')
     # style all elements in the report_link column
+    if "report_link" in columns:
+        display_df["report_link"] = display_df["report_link"].apply(
+            lambda x: f'<p href="{x}" style="color:blue">🔗{x}</p>'
+        )
     return display_df
 def get_demo():
+    records = get_records_from_dataset_repo("ZeroCommand/test-giskard-report")
     model_ids = get_model_ids(records)
     dataset_ids = get_dataset_ids(records)
     column_names = records.columns.tolist()
+    default_columns = ["model_id", "dataset_id", "total_issues", "report_link"]
+    default_df = records[default_columns]  # extract columns selected
     types = get_types(default_df)
+    display_df = get_display_df(default_df)  # the styled dataframe to display
     with gr.Row():
+        task_select = gr.Dropdown(
+            label="Task",
+            choices=["text_classification", "tabular"],
+            value="text_classification",
+            interactive=True,
+        )
+        model_select = gr.Dropdown(
+            label="Model id", choices=model_ids, interactive=True
+        )
+        dataset_select = gr.Dropdown(
+            label="Dataset id", choices=dataset_ids, interactive=True
+        )
     with gr.Row():
+        columns_select = gr.CheckboxGroup(
+            label="Show columns",
+            choices=column_names,
+            value=default_columns,
+            interactive=True,
+        )
     with gr.Row():
         leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
+    @gr.on(
+        triggers=[
+            model_select.change,
+            dataset_select.change,
+            columns_select.change,
+            task_select.change,
+        ],
+        inputs=[model_select, dataset_select, columns_select, task_select],
+        outputs=[leaderboard_df],
+    )
     def filter_table(model_id, dataset_id, columns, task):
         # filter the table based on task
+        df = records[(records["task"] == task)]
         # filter the table based on the model_id and dataset_id
         if model_id:
+            df = records[(records["model_id"] == model_id)]
         if dataset_id:
+            df = records[(records["dataset_id"] == dataset_id)]
         # filter the table based on the columns
         df = df[columns]
         types = get_types(df)
         display_df = get_display_df(df)
+        return gr.update(value=display_df, datatype=types, interactive=False)

app_legacy.py CHANGED Viewed

@@ -1,22 +1,31 @@
-import gradio as gr
-import datasets
-import huggingface_hub
 import os
-import time
 import subprocess
-import logging
-import json
 from transformers.pipelines import TextClassificationPipeline
-from text_classification import check_column_mapping_keys_validity, text_classification_fix_column_mapping
-from io_utils import read_scanners, write_scanners, read_inference_type, write_inference_type, convert_column_mapping_to_json
-from wordings import CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_MD
-HF_REPO_ID = 'HF_REPO_ID'
-HF_SPACE_ID = 'SPACE_ID'
-HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
 def check_model(model_id):
     try:
@@ -26,6 +35,7 @@ def check_model(model_id):
     try:
         from transformers import pipeline
         ppl = pipeline(task=task, model=model_id)
         return model_id, ppl
@@ -55,55 +65,70 @@ def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
         return dataset_id, None, None
     return dataset_id, dataset_config, dataset_split
-def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping='{}'):
     # Validate model
     if m_id is None:
-        gr.Warning('Model is not accessible. Please set your HF_TOKEN if it is a private model.')
         return (
-            gr.update(interactive=False),   # Submit button
-            gr.update(visible=True),       # Loading row
-            gr.update(visible=False),        # Preview row
-            gr.update(visible=False),       # Model prediction input
-            gr.update(visible=False),       # Model prediction preview
-            gr.update(visible=False),       # Label mapping preview
-            gr.update(visible=False),       # feature mapping preview
         )
     if isinstance(ppl, Exception):
         gr.Warning(f'Failed to load model": {ppl}')
         return (
-            gr.update(interactive=False),   # Submit button
-            gr.update(visible=True),       # Loading row
-            gr.update(visible=False),        # Preview row
-            gr.update(visible=False),       # Model prediction input
-            gr.update(visible=False),       # Model prediction preview
-            gr.update(visible=False),       # Label mapping preview
-            gr.update(visible=False),       # feature mapping preview
         )
     # Validate dataset
-    d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
     dataset_ok = False
     if d_id is None:
-        gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.')
     elif isinstance(config, list):
-        gr.Warning(f'Dataset "{dataset_id}" does not have "{dataset_config}" config. Please choose a valid config.')
         config = gr.update(choices=config, value=config[0])
     elif isinstance(split, list):
-        gr.Warning(f'Dataset "{dataset_id}" does not have "{dataset_split}" split. Please choose a valid split.')
         split = gr.update(choices=split, value=split[0])
     else:
         dataset_ok = True
     if not dataset_ok:
         return (
-            gr.update(interactive=False),   # Submit button
-            gr.update(visible=True),        # Loading row
-            gr.update(visible=False),       # Preview row
-            gr.update(visible=False),       # Model prediction input
-            gr.update(visible=False),       # Model prediction preview
-            gr.update(visible=False),       # Label mapping preview
-            gr.update(visible=False),       # feature mapping preview
         )
     # TODO: Validate column mapping by running once
@@ -115,55 +140,94 @@ def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_ma
         except Exception:
             column_mapping = {}
-        column_mapping, prediction_input, prediction_result, id2label_df, feature_df = \
-            text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split)
         column_mapping = json.dumps(column_mapping, indent=2)
     if prediction_result is None and id2label_df is not None:
-        gr.Warning('The model failed to predict with the first row in the dataset. Please provide feature mappings in "Advance" settings.')
         return (
-            gr.update(interactive=False),   # Submit button
-            gr.update(visible=False),       # Loading row
-            gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True),        # Preview row
-            gr.update(value=f'**Sample Input**: {prediction_input}', visible=True),       # Model prediction input
-            gr.update(visible=False),   # Model prediction preview
-            gr.update(value=id2label_df, visible=True, interactive=True),   # Label mapping preview
-            gr.update(value=feature_df, visible=True, interactive=True),   # feature mapping preview
         )
     elif id2label_df is None:
-        gr.Warning('The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.')
         return (
-            gr.update(interactive=False),   # Submit button
-            gr.update(visible=False),       # Loading row
-            gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True),        # Preview row
-            gr.update(value=f'**Sample Input**: {prediction_input}', visible=True),       # Model prediction input
-            gr.update(value=prediction_result, visible=True),   # Model prediction preview
-            gr.update(visible=True, interactive=True),   # Label mapping preview
-            gr.update(visible=True, interactive=True),   # feature mapping preview
         )
-    gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
     return (
-        gr.update(interactive=True),    # Submit button
-        gr.update(visible=False),       # Loading row
-        gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True),        # Preview row
-        gr.update(value=f'**Sample Input**: {prediction_input}', visible=True),       # Model prediction input
-        gr.update(value=prediction_result, visible=True),   # Model prediction preview
-        gr.update(value=id2label_df, visible=True, interactive=True), # Label mapping preview
-        gr.update(value=feature_df, visible=True, interactive=True),   # feature mapping preview
     )
-def try_submit(m_id, d_id, config, split, id2label_mapping_dataframe, feature_mapping_dataframe, local):
     label_mapping = {}
     for i, label in id2label_mapping_dataframe["Model Prediction Labels"].items():
         label_mapping.update({str(i): label})
     feature_mapping = {}
     for i, feature in feature_mapping_dataframe["Dataset Features"].items():
-        feature_mapping.update({feature_mapping_dataframe["Model Input Features"][i]: feature})
     # TODO: Set column mapping for some dataset such as `amazon_polarity`
@@ -171,18 +235,30 @@ def try_submit(m_id, d_id, config, split, id2label_mapping_dataframe, feature_ma
         command = [
             "python",
             "cli.py",
-            "--loader", "huggingface",
-            "--model", m_id,
-            "--dataset", d_id,
-            "--dataset_config", config,
-            "--dataset_split", split,
-            "--hf_token", os.environ.get(HF_WRITE_TOKEN),
-            "--discussion_repo", os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
-            "--output_format", "markdown",
-            "--output_portal", "huggingface",
-            "--feature_mapping", json.dumps(feature_mapping),
-            "--label_mapping", json.dumps(label_mapping),
-            "--scan_config", "../config.yaml",
         ]
         eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
@@ -196,12 +272,16 @@ def try_submit(m_id, d_id, config, split, id2label_mapping_dataframe, feature_ma
         )
         result = evaluator.wait()
-        logging.info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
-        gr.Info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
     else:
         gr.Info("TODO: Submit task to an endpoint")
     return gr.update(interactive=True)  # Submit button
@@ -224,56 +304,82 @@ def get_demo():
             return gr.Dropdown(splits, value=splits[0], visible=True)
         except Exception as e:
             # Dataset may not exist
-            gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
-            pass
     def clear_column_mapping_tables():
         return [
             gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
             gr.update(value=[], visible=False, interactive=True),
             gr.update(value=[], visible=False, interactive=True),
         ]
-    def gate_validate_btn(model_id, dataset_id, dataset_config, dataset_split, id2label_mapping_dataframe=None, feature_mapping_dataframe=None):
-        column_mapping = '{}'
         _, ppl = check_model(model_id=model_id)
         if id2label_mapping_dataframe is not None:
-            labels = convert_column_mapping_to_json(id2label_mapping_dataframe.value, label="data")
-            features = convert_column_mapping_to_json(feature_mapping_dataframe.value, label="text")
             column_mapping = json.dumps({**labels, **features}, indent=2)
         if check_column_mapping_keys_validity(column_mapping, ppl) is False:
-            gr.Warning('Label mapping table has invalid contents. Please check again.')
-            return (gr.update(interactive=False),
-                    gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
-                    gr.update(),
-                    gr.update(),
-                    gr.update(),
-                    gr.update(),
-                    gr.update())
         else:
             if model_id and dataset_id and dataset_config and dataset_split:
-                return try_validate(model_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping)
             else:
-                return (gr.update(interactive=False),
-                        gr.update(visible=True),
-                        gr.update(visible=False),
-                        gr.update(visible=False),
-                        gr.update(visible=False),
-                        gr.update(visible=False),
-                        gr.update(visible=False))
     with gr.Row():
         gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
     with gr.Row():
         run_local = gr.Checkbox(value=True, label="Run in this Space")
-        use_inference = read_inference_type('./config.yaml') == 'hf_inference_api'
         run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
     with gr.Row():
-        selected = read_scanners('./config.yaml')
-        scan_config = selected + ['data_leakage']
-        scanners = gr.CheckboxGroup(choices=scan_config, value=selected, label='Scan Settings', visible=True)
     with gr.Row():
         model_id_input = gr.Textbox(
@@ -286,75 +392,154 @@ def get_demo():
             placeholder="tweet_eval",
         )
     with gr.Row():
-        dataset_config_input = gr.Dropdown(label='Dataset Config', visible=False)
-        dataset_split_input = gr.Dropdown(label='Dataset Split', visible=False)
     with gr.Row(visible=True) as loading_row:
-        gr.Markdown('''
                     <p style="text-align: center;">
                     🚀🐢Please validate your model and dataset first...
                     </p>
-                    ''')
     with gr.Row(visible=False) as preview_row:
-        gr.Markdown('''
             <h1 style="text-align: center;">
             Confirm Pre-processing Details
             </h1>
             Base on your model and dataset, we inferred this label mapping and feature mapping. <b>If the mapping is incorrect, please modify it in the table below.</b>
-            ''')
     with gr.Row():
-        id2label_mapping_dataframe = gr.DataFrame(label="Preview of label mapping", interactive=True, visible=False)
-        feature_mapping_dataframe = gr.DataFrame(label="Preview of feature mapping", interactive=True, visible=False)
     with gr.Row():
-        example_input = gr.Markdown('Sample Input: ', visible=False)
     with gr.Row():
-        example_labels = gr.Label(label='Model Prediction Sample', visible=False)
     run_btn = gr.Button(
         "Get Evaluation Result",
         variant="primary",
         interactive=False,
         size="lg",
     )
-    model_id_input.blur(clear_column_mapping_tables, outputs=[id2label_mapping_dataframe, feature_mapping_dataframe])
-    dataset_id_input.blur(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
-    dataset_id_input.submit(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
     dataset_config_input.change(
-        check_dataset_and_get_split,
-        inputs=[dataset_config_input, dataset_id_input],
-        outputs=[dataset_split_input])
-    dataset_id_input.blur(clear_column_mapping_tables, outputs=[id2label_mapping_dataframe, feature_mapping_dataframe])
-    # model_id_input.blur(gate_validate_btn,
-    #                         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
     #                         outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
-    # dataset_id_input.blur(gate_validate_btn,
-    #                         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                            # outputs=[run_btn, loading_row, preview_row, example_input,  example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
-    dataset_config_input.change(gate_validate_btn,
-                            inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                            outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
-    dataset_split_input.change(gate_validate_btn,
-                            inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                            outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
-    id2label_mapping_dataframe.input(gate_validate_btn,
-                            inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, id2label_mapping_dataframe, feature_mapping_dataframe],
-                            outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
-    feature_mapping_dataframe.input(gate_validate_btn,
-                            inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, id2label_mapping_dataframe, feature_mapping_dataframe],
-                            outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
-    scanners.change(write_scanners, inputs=scanners)
-    run_inference.change(
-        write_inference_type,
-        inputs=[run_inference]
     )
     run_btn.click(
         try_submit,
@@ -370,4 +555,4 @@ def get_demo():
         outputs=[
             run_btn,
         ],
-    )

+import json
+import logging
 import os
 import subprocess
+import time
+import datasets
+import gradio as gr
+import huggingface_hub
 from transformers.pipelines import TextClassificationPipeline
+from io_utils import (
+    convert_column_mapping_to_json,
+    read_inference_type,
+    read_scanners,
+    write_inference_type,
+    write_scanners,
+)
+from text_classification import (
+    check_column_mapping_keys_validity,
+    text_classification_fix_column_mapping,
+)
+from wordings import CONFIRM_MAPPING_DETAILS_FAIL_MD, CONFIRM_MAPPING_DETAILS_MD
+HF_REPO_ID = "HF_REPO_ID"
+HF_SPACE_ID = "SPACE_ID"
+HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
 def check_model(model_id):
     try:
     try:
         from transformers import pipeline
         ppl = pipeline(task=task, model=model_id)
         return model_id, ppl
         return dataset_id, None, None
     return dataset_id, dataset_config, dataset_split
+def try_validate(
+    m_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping="{}"
+):
     # Validate model
     if m_id is None:
+        gr.Warning(
+            "Model is not accessible. Please set your HF_TOKEN if it is a private model."
+        )
         return (
+            gr.update(interactive=False),  # Submit button
+            gr.update(visible=True),  # Loading row
+            gr.update(visible=False),  # Preview row
+            gr.update(visible=False),  # Model prediction input
+            gr.update(visible=False),  # Model prediction preview
+            gr.update(visible=False),  # Label mapping preview
+            gr.update(visible=False),  # feature mapping preview
         )
     if isinstance(ppl, Exception):
         gr.Warning(f'Failed to load model": {ppl}')
         return (
+            gr.update(interactive=False),  # Submit button
+            gr.update(visible=True),  # Loading row
+            gr.update(visible=False),  # Preview row
+            gr.update(visible=False),  # Model prediction input
+            gr.update(visible=False),  # Model prediction preview
+            gr.update(visible=False),  # Label mapping preview
+            gr.update(visible=False),  # feature mapping preview
         )
     # Validate dataset
+    d_id, config, split = check_dataset(
+        dataset_id=dataset_id,
+        dataset_config=dataset_config,
+        dataset_split=dataset_split,
+    )
     dataset_ok = False
     if d_id is None:
+        gr.Warning(
+            f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.'
+        )
     elif isinstance(config, list):
+        gr.Warning(
+            f'Dataset "{dataset_id}" does not have "{dataset_config}" config. Please choose a valid config.'
+        )
         config = gr.update(choices=config, value=config[0])
     elif isinstance(split, list):
+        gr.Warning(
+            f'Dataset "{dataset_id}" does not have "{dataset_split}" split. Please choose a valid split.'
+        )
         split = gr.update(choices=split, value=split[0])
     else:
         dataset_ok = True
     if not dataset_ok:
         return (
+            gr.update(interactive=False),  # Submit button
+            gr.update(visible=True),  # Loading row
+            gr.update(visible=False),  # Preview row
+            gr.update(visible=False),  # Model prediction input
+            gr.update(visible=False),  # Model prediction preview
+            gr.update(visible=False),  # Label mapping preview
+            gr.update(visible=False),  # feature mapping preview
         )
     # TODO: Validate column mapping by running once
         except Exception:
             column_mapping = {}
+        (
+            column_mapping,
+            prediction_input,
+            prediction_result,
+            id2label_df,
+            feature_df,
+        ) = text_classification_fix_column_mapping(
+            column_mapping, ppl, d_id, config, split
+        )
         column_mapping = json.dumps(column_mapping, indent=2)
     if prediction_result is None and id2label_df is not None:
+        gr.Warning(
+            'The model failed to predict with the first row in the dataset. Please provide feature mappings in "Advance" settings.'
+        )
         return (
+            gr.update(interactive=False),  # Submit button
+            gr.update(visible=False),  # Loading row
+            gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True),  # Preview row
+            gr.update(
+                value=f"**Sample Input**: {prediction_input}", visible=True
+            ),  # Model prediction input
+            gr.update(visible=False),  # Model prediction preview
+            gr.update(
+                value=id2label_df, visible=True, interactive=True
+            ),  # Label mapping preview
+            gr.update(
+                value=feature_df, visible=True, interactive=True
+            ),  # feature mapping preview
         )
     elif id2label_df is None:
+        gr.Warning(
+            'The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.'
+        )
         return (
+            gr.update(interactive=False),  # Submit button
+            gr.update(visible=False),  # Loading row
+            gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True),  # Preview row
+            gr.update(
+                value=f"**Sample Input**: {prediction_input}", visible=True
+            ),  # Model prediction input
+            gr.update(
+                value=prediction_result, visible=True
+            ),  # Model prediction preview
+            gr.update(visible=True, interactive=True),  # Label mapping preview
+            gr.update(visible=True, interactive=True),  # feature mapping preview
         )
+    gr.Info(
+        "Model and dataset validations passed. Your can submit the evaluation task."
+    )
     return (
+        gr.update(interactive=True),  # Submit button
+        gr.update(visible=False),  # Loading row
+        gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True),  # Preview row
+        gr.update(
+            value=f"**Sample Input**: {prediction_input}", visible=True
+        ),  # Model prediction input
+        gr.update(value=prediction_result, visible=True),  # Model prediction preview
+        gr.update(
+            value=id2label_df, visible=True, interactive=True
+        ),  # Label mapping preview
+        gr.update(
+            value=feature_df, visible=True, interactive=True
+        ),  # feature mapping preview
     )
+def try_submit(
+    m_id,
+    d_id,
+    config,
+    split,
+    id2label_mapping_dataframe,
+    feature_mapping_dataframe,
+    local,
+):
     label_mapping = {}
     for i, label in id2label_mapping_dataframe["Model Prediction Labels"].items():
         label_mapping.update({str(i): label})
     feature_mapping = {}
     for i, feature in feature_mapping_dataframe["Dataset Features"].items():
+        feature_mapping.update(
+            {feature_mapping_dataframe["Model Input Features"][i]: feature}
+        )
     # TODO: Set column mapping for some dataset such as `amazon_polarity`
         command = [
             "python",
             "cli.py",
+            "--loader",
+            "huggingface",
+            "--model",
+            m_id,
+            "--dataset",
+            d_id,
+            "--dataset_config",
+            config,
+            "--dataset_split",
+            split,
+            "--hf_token",
+            os.environ.get(HF_WRITE_TOKEN),
+            "--discussion_repo",
+            os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
+            "--output_format",
+            "markdown",
+            "--output_portal",
+            "huggingface",
+            "--feature_mapping",
+            json.dumps(feature_mapping),
+            "--label_mapping",
+            json.dumps(label_mapping),
+            "--scan_config",
+            "../config.yaml",
         ]
         eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
         )
         result = evaluator.wait()
+        logging.info(
+            f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s"
+        )
+        gr.Info(
+            f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s"
+        )
     else:
         gr.Info("TODO: Submit task to an endpoint")
     return gr.update(interactive=True)  # Submit button
             return gr.Dropdown(splits, value=splits[0], visible=True)
         except Exception as e:
             # Dataset may not exist
+            gr.Warning(
+                f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
+            )
     def clear_column_mapping_tables():
         return [
             gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
             gr.update(value=[], visible=False, interactive=True),
             gr.update(value=[], visible=False, interactive=True),
         ]
+    def gate_validate_btn(
+        model_id,
+        dataset_id,
+        dataset_config,
+        dataset_split,
+        id2label_mapping_dataframe=None,
+        feature_mapping_dataframe=None,
+    ):
+        column_mapping = "{}"
         _, ppl = check_model(model_id=model_id)
         if id2label_mapping_dataframe is not None:
+            labels = convert_column_mapping_to_json(
+                id2label_mapping_dataframe.value, label="data"
+            )
+            features = convert_column_mapping_to_json(
+                feature_mapping_dataframe.value, label="text"
+            )
             column_mapping = json.dumps({**labels, **features}, indent=2)
         if check_column_mapping_keys_validity(column_mapping, ppl) is False:
+            gr.Warning("Label mapping table has invalid contents. Please check again.")
+            return (
+                gr.update(interactive=False),
+                gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
+                gr.update(),
+                gr.update(),
+                gr.update(),
+                gr.update(),
+                gr.update(),
+            )
         else:
             if model_id and dataset_id and dataset_config and dataset_split:
+                return try_validate(
+                    model_id,
+                    ppl,
+                    dataset_id,
+                    dataset_config,
+                    dataset_split,
+                    column_mapping,
+                )
             else:
+                return (
+                    gr.update(interactive=False),
+                    gr.update(visible=True),
+                    gr.update(visible=False),
+                    gr.update(visible=False),
+                    gr.update(visible=False),
+                    gr.update(visible=False),
+                    gr.update(visible=False),
+                )
     with gr.Row():
         gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
     with gr.Row():
         run_local = gr.Checkbox(value=True, label="Run in this Space")
+        use_inference = read_inference_type("./config.yaml") == "hf_inference_api"
         run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
     with gr.Row():
+        selected = read_scanners("./config.yaml")
+        scan_config = selected + ["data_leakage"]
+        scanners = gr.CheckboxGroup(
+            choices=scan_config, value=selected, label="Scan Settings", visible=True
+        )
     with gr.Row():
         model_id_input = gr.Textbox(
             placeholder="tweet_eval",
         )
     with gr.Row():
+        dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
+        dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)
     with gr.Row(visible=True) as loading_row:
+        gr.Markdown(
+            """
                     <p style="text-align: center;">
                     🚀🐢Please validate your model and dataset first...
                     </p>
+                    """
+        )
     with gr.Row(visible=False) as preview_row:
+        gr.Markdown(
+            """
             <h1 style="text-align: center;">
             Confirm Pre-processing Details
             </h1>
             Base on your model and dataset, we inferred this label mapping and feature mapping. <b>If the mapping is incorrect, please modify it in the table below.</b>
+            """
+        )
     with gr.Row():
+        id2label_mapping_dataframe = gr.DataFrame(
+            label="Preview of label mapping", interactive=True, visible=False
+        )
+        feature_mapping_dataframe = gr.DataFrame(
+            label="Preview of feature mapping", interactive=True, visible=False
+        )
     with gr.Row():
+        example_input = gr.Markdown("Sample Input: ", visible=False)
     with gr.Row():
+        example_labels = gr.Label(label="Model Prediction Sample", visible=False)
     run_btn = gr.Button(
         "Get Evaluation Result",
         variant="primary",
         interactive=False,
         size="lg",
     )
+    model_id_input.blur(
+        clear_column_mapping_tables,
+        outputs=[id2label_mapping_dataframe, feature_mapping_dataframe],
+    )
+    dataset_id_input.blur(
+        check_dataset_and_get_config, dataset_id_input, dataset_config_input
+    )
+    dataset_id_input.submit(
+        check_dataset_and_get_config, dataset_id_input, dataset_config_input
+    )
     dataset_config_input.change(
+        check_dataset_and_get_split,
+        inputs=[dataset_config_input, dataset_id_input],
+        outputs=[dataset_split_input],
+    )
+    dataset_id_input.blur(
+        clear_column_mapping_tables,
+        outputs=[id2label_mapping_dataframe, feature_mapping_dataframe],
+    )
+    # model_id_input.blur(gate_validate_btn,
+    #                         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
     #                         outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
+    # dataset_id_input.blur(gate_validate_btn,
+    #                         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
+    # outputs=[run_btn, loading_row, preview_row, example_input,  example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
+    dataset_config_input.change(
+        gate_validate_btn,
+        inputs=[
+            model_id_input,
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
+        ],
+        outputs=[
+            run_btn,
+            loading_row,
+            preview_row,
+            example_input,
+            example_labels,
+            id2label_mapping_dataframe,
+            feature_mapping_dataframe,
+        ],
     )
+    dataset_split_input.change(
+        gate_validate_btn,
+        inputs=[
+            model_id_input,
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
+        ],
+        outputs=[
+            run_btn,
+            loading_row,
+            preview_row,
+            example_input,
+            example_labels,
+            id2label_mapping_dataframe,
+            feature_mapping_dataframe,
+        ],
+    )
+    id2label_mapping_dataframe.input(
+        gate_validate_btn,
+        inputs=[
+            model_id_input,
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
+            id2label_mapping_dataframe,
+            feature_mapping_dataframe,
+        ],
+        outputs=[
+            run_btn,
+            loading_row,
+            preview_row,
+            example_input,
+            example_labels,
+            id2label_mapping_dataframe,
+            feature_mapping_dataframe,
+        ],
+    )
+    feature_mapping_dataframe.input(
+        gate_validate_btn,
+        inputs=[
+            model_id_input,
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
+            id2label_mapping_dataframe,
+            feature_mapping_dataframe,
+        ],
+        outputs=[
+            run_btn,
+            loading_row,
+            preview_row,
+            example_input,
+            example_labels,
+            id2label_mapping_dataframe,
+            feature_mapping_dataframe,
+        ],
+    )
+    scanners.change(write_scanners, inputs=scanners)
+    run_inference.change(write_inference_type, inputs=[run_inference])
     run_btn.click(
         try_submit,
         outputs=[
             run_btn,
         ],
+    )

app_text_classification.py CHANGED Viewed

@@ -1,15 +1,28 @@
 import gradio as gr
 import uuid
-from io_utils import read_scanners, write_scanners, read_inference_type, write_inference_type, get_logs_file
 from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD
-from text_classification_ui_helpers import try_submit, check_dataset_and_get_config, check_dataset_and_get_split, check_model_and_show_prediction, write_column_mapping_to_config
 MAX_LABELS = 20
 MAX_FEATURES = 20
-EXAMPLE_MODEL_ID = 'cardiffnlp/twitter-roberta-base-sentiment-latest'
-EXAMPLE_DATA_ID = 'tweet_eval'
-CONFIG_PATH='./config.yaml'
 def get_demo(demo):
     with gr.Row():
@@ -24,18 +37,20 @@ def get_demo(demo):
             label="Hugging Face Dataset id",
             placeholder=EXAMPLE_DATA_ID + " (press enter to confirm)",
         )
     with gr.Row():
-        dataset_config_input = gr.Dropdown(label='Dataset Config', visible=False)
-        dataset_split_input = gr.Dropdown(label='Dataset Split', visible=False)
     with gr.Row():
-        example_input = gr.Markdown('Example Input', visible=False)
     with gr.Row():
-        example_prediction = gr.Label(label='Model Prediction Sample', visible=False)
     with gr.Row():
-        with gr.Accordion(label='Label and Feature Mapping', visible=False, open=False) as column_mapping_accordion:
             with gr.Row():
                 gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
             column_mappings = []
@@ -43,22 +58,24 @@ def get_demo(demo):
                 with gr.Column():
                     for _ in range(MAX_LABELS):
                         column_mappings.append(gr.Dropdown(visible=False))
-                with gr.Column():
                     for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
                         column_mappings.append(gr.Dropdown(visible=False))
-    with gr.Accordion(label='Model Wrap Advance Config (optional)', open=False):
         run_local = gr.Checkbox(value=True, label="Run in this Space")
-        use_inference = read_inference_type('./config.yaml') == 'hf_inference_api'
         run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
-    with gr.Accordion(label='Scanner Advance Config (optional)', open=False):
-        selected = read_scanners('./config.yaml')
         # currently we remove data_leakage from the default scanners
         # Reason: data_leakage barely raises any issues and takes too many requests
         # when using inference API, causing rate limit error
-        scan_config = selected + ['data_leakage']
-        scanners = gr.CheckboxGroup(choices=scan_config, value=selected, label='Scan Settings', visible=True)
     with gr.Row():
         run_btn = gr.Button(
@@ -67,69 +84,97 @@ def get_demo(demo):
             interactive=True,
             size="lg",
         )
     with gr.Row():
         uid = uuid.uuid4()
-        uid_label = gr.Textbox(label="Evaluation ID:", value=uid, visible=False, interactive=False)
         logs = gr.Textbox(label="Giskard Bot Evaluation Log:", visible=False)
         demo.load(get_logs_file, uid_label, logs, every=0.5)
-    gr.on(triggers=[label.change for label in column_mappings],
         fn=write_column_mapping_to_config,
-        inputs=[dataset_id_input, dataset_config_input, dataset_split_input, *column_mappings])
-    gr.on(triggers=[model_id_input.change, dataset_config_input.change, dataset_split_input.change],
         fn=check_model_and_show_prediction,
-        inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-        outputs=[example_input, example_prediction, column_mapping_accordion, *column_mappings])
-    dataset_id_input.blur(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
     dataset_config_input.change(
-        check_dataset_and_get_split,
-        inputs=[dataset_id_input, dataset_config_input],
-        outputs=[dataset_split_input])
-    scanners.change(
-        write_scanners,
-        inputs=scanners
     )
-    run_inference.change(
-        write_inference_type,
-        inputs=[run_inference]
-    )
     gr.on(
         triggers=[
             run_btn.click,
-            ],
         fn=try_submit,
         inputs=[
-            model_id_input,
-            dataset_id_input,
-            dataset_config_input,
-            dataset_split_input,
             run_local,
-            uid_label],
-        outputs=[run_btn, logs])
     def enable_run_btn():
-        return (gr.update(interactive=True))
     gr.on(
         triggers=[
-                model_id_input.change,
-                dataset_config_input.change,
-                dataset_split_input.change,
-                run_inference.change,
-                run_local.change,
-                scanners.change],
         fn=enable_run_btn,
         inputs=None,
-        outputs=[run_btn])
     gr.on(
         triggers=[label.change for label in column_mappings],
         fn=enable_run_btn,
         inputs=None,
-        outputs=[run_btn])

 import gradio as gr
 import uuid
+from io_utils import (
+    read_scanners,
+    write_scanners,
+    read_inference_type,
+    write_inference_type,
+    get_logs_file,
+)
 from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD
+from text_classification_ui_helpers import (
+    try_submit,
+    check_dataset_and_get_config,
+    check_dataset_and_get_split,
+    check_model_and_show_prediction,
+    write_column_mapping_to_config,
+)
 MAX_LABELS = 20
 MAX_FEATURES = 20
+EXAMPLE_MODEL_ID = "cardiffnlp/twitter-roberta-base-sentiment-latest"
+EXAMPLE_DATA_ID = "tweet_eval"
+CONFIG_PATH = "./config.yaml"
 def get_demo(demo):
     with gr.Row():
             label="Hugging Face Dataset id",
             placeholder=EXAMPLE_DATA_ID + " (press enter to confirm)",
         )
     with gr.Row():
+        dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
+        dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)
     with gr.Row():
+        example_input = gr.Markdown("Example Input", visible=False)
     with gr.Row():
+        example_prediction = gr.Label(label="Model Prediction Sample", visible=False)
     with gr.Row():
+        with gr.Accordion(
+            label="Label and Feature Mapping", visible=False, open=False
+        ) as column_mapping_accordion:
             with gr.Row():
                 gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
             column_mappings = []
                 with gr.Column():
                     for _ in range(MAX_LABELS):
                         column_mappings.append(gr.Dropdown(visible=False))
+                with gr.Column():
                     for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
                         column_mappings.append(gr.Dropdown(visible=False))
+    with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
         run_local = gr.Checkbox(value=True, label="Run in this Space")
+        use_inference = read_inference_type(CONFIG_PATH) == "hf_inference_api"
         run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
+    with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
+        selected = read_scanners(CONFIG_PATH)
         # currently we remove data_leakage from the default scanners
         # Reason: data_leakage barely raises any issues and takes too many requests
         # when using inference API, causing rate limit error
+        scan_config = selected + ["data_leakage"]
+        scanners = gr.CheckboxGroup(
+            choices=scan_config, value=selected, label="Scan Settings", visible=True
+        )
     with gr.Row():
         run_btn = gr.Button(
             interactive=True,
             size="lg",
         )
     with gr.Row():
         uid = uuid.uuid4()
+        uid_label = gr.Textbox(
+            label="Evaluation ID:", value=uid, visible=False, interactive=False
+        )
         logs = gr.Textbox(label="Giskard Bot Evaluation Log:", visible=False)
         demo.load(get_logs_file, uid_label, logs, every=0.5)
+    gr.on(
+        triggers=[label.change for label in column_mappings],
         fn=write_column_mapping_to_config,
+        inputs=[
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
+            *column_mappings,
+        ],
+    )
+    gr.on(
+        triggers=[
+            model_id_input.change,
+            dataset_config_input.change,
+            dataset_split_input.change,
+        ],
         fn=check_model_and_show_prediction,
+        inputs=[
+            model_id_input,
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
+        ],
+        outputs=[
+            example_input,
+            example_prediction,
+            column_mapping_accordion,
+            *column_mappings,
+        ],
+    )
+    dataset_id_input.blur(
+        check_dataset_and_get_config, dataset_id_input, dataset_config_input
+    )
     dataset_config_input.change(
+        check_dataset_and_get_split,
+        inputs=[dataset_id_input, dataset_config_input],
+        outputs=[dataset_split_input],
     )
+    scanners.change(write_scanners, inputs=scanners)
+    run_inference.change(write_inference_type, inputs=[run_inference])
     gr.on(
         triggers=[
             run_btn.click,
+        ],
         fn=try_submit,
         inputs=[
+            model_id_input,
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
             run_local,
+            uid_label,
+        ],
+        outputs=[run_btn, logs],
+    )
     def enable_run_btn():
+        return gr.update(interactive=True)
     gr.on(
         triggers=[
+            model_id_input.change,
+            dataset_config_input.change,
+            dataset_split_input.change,
+            run_inference.change,
+            run_local.change,
+            scanners.change,
+        ],
         fn=enable_run_btn,
         inputs=None,
+        outputs=[run_btn],
+    )
     gr.on(
         triggers=[label.change for label in column_mappings],
         fn=enable_run_btn,
         inputs=None,
+        outputs=[run_btn],
+    )

fetch_utils.py CHANGED Viewed

@@ -1,6 +1,8 @@
-import datasets
 import logging
 def check_dataset_and_get_config(dataset_id):
     try:
         configs = datasets.get_dataset_config_names(dataset_id)
@@ -9,17 +11,22 @@ def check_dataset_and_get_config(dataset_id):
         # Dataset may not exist
         return None
 def check_dataset_and_get_split(dataset_id, dataset_config):
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config)
     except Exception as e:
         # Dataset may not exist
-        logging.warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
         return None
     try:
         splits = list(ds.keys())
         return splits
     except Exception as e:
         # Dataset has no splits
-        logging.warning(f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}")
-        return None

 import logging
+import datasets
 def check_dataset_and_get_config(dataset_id):
     try:
         configs = datasets.get_dataset_config_names(dataset_id)
         # Dataset may not exist
         return None
 def check_dataset_and_get_split(dataset_id, dataset_config):
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config)
     except Exception as e:
         # Dataset may not exist
+        logging.warning(
+            f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
+        )
         return None
     try:
         splits = list(ds.keys())
         return splits
     except Exception as e:
         # Dataset has no splits
+        logging.warning(
+            f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}"
+        )
+        return None

io_utils.py CHANGED Viewed

@@ -1,14 +1,17 @@
-import yaml
-import subprocess
 import os
 YAML_PATH = "./config.yaml"
 PIPE_PATH = "./tmp/pipe"
 class Dumper(yaml.Dumper):
     def increase_indent(self, flow=False, *args, **kwargs):
         return super().increase_indent(flow=flow, indentless=False)
 # read scanners from yaml file
 # return a list of scanners
 def read_scanners(path):
@@ -18,6 +21,7 @@ def read_scanners(path):
         scanners = config.get("detectors", [])
     return scanners
 # convert a list of scanners to yaml file
 def write_scanners(scanners):
     print(scanners)
@@ -28,6 +32,7 @@ def write_scanners(scanners):
             # save scanners to detectors in yaml
             yaml.dump(config, f, Dumper=Dumper)
 # read model_type from yaml file
 def read_inference_type(path):
     inference_type = ""
@@ -36,17 +41,19 @@ def read_inference_type(path):
         inference_type = config.get("inference_type", "")
     return inference_type
 # write model_type to yaml file
 def write_inference_type(use_inference):
     with open(YAML_PATH, "r+") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     if use_inference:
-        config["inference_type"] = 'hf_inference_api'
     else:
-        config["inference_type"] = 'hf_pipeline'
     # save inference_type to inference_type in yaml
     yaml.dump(config, f, Dumper=Dumper)
 # read column mapping from yaml file
 def read_column_mapping(path):
     column_mapping = {}
@@ -56,6 +63,7 @@ def read_column_mapping(path):
             column_mapping = config.get("column_mapping", dict())
     return column_mapping
 # write column mapping to yaml file
 def write_column_mapping(mapping):
     with open(YAML_PATH, "r") as f:
@@ -70,6 +78,7 @@ def write_column_mapping(mapping):
         # save column_mapping to column_mapping in yaml
         yaml.dump(config, f, Dumper=Dumper)
 # convert column mapping dataframe to json
 def convert_column_mapping_to_json(df, label=""):
     column_mapping = {}
@@ -78,6 +87,7 @@ def convert_column_mapping_to_json(df, label=""):
         column_mapping[label].append(row.tolist())
     return column_mapping
 def get_logs_file(uid):
     try:
         file = open(f"./tmp/{uid}_log", "r")
@@ -85,20 +95,23 @@ def get_logs_file(uid):
     except Exception:
         return "Log file does not exist"
 def write_log_to_user_file(id, log):
     with open(f"./tmp/{id}_log", "a") as f:
         f.write(log)
 def save_job_to_pipe(id, job, lock):
-    if not os.path.exists('./tmp'):
-        os.makedirs('./tmp')
     job = [str(i) for i in job]
     job = ",".join(job)
     print(job)
     with lock:
         with open(PIPE_PATH, "a") as f:
             # write each element in job
-            f.write(f'{id}@{job}\n')
 def pop_job_from_pipe():
     if not os.path.exists(PIPE_PATH):
@@ -113,7 +126,7 @@ def pop_job_from_pipe():
         f.close()
     if len(job) == 0:
         return
-    job_info = job.split('\n')[0].split("@")
     if len(job_info) != 2:
         raise ValueError("Invalid job info: ", job_info)

 import os
+import subprocess
+import yaml
 YAML_PATH = "./config.yaml"
 PIPE_PATH = "./tmp/pipe"
 class Dumper(yaml.Dumper):
     def increase_indent(self, flow=False, *args, **kwargs):
         return super().increase_indent(flow=flow, indentless=False)
 # read scanners from yaml file
 # return a list of scanners
 def read_scanners(path):
         scanners = config.get("detectors", [])
     return scanners
 # convert a list of scanners to yaml file
 def write_scanners(scanners):
     print(scanners)
             # save scanners to detectors in yaml
             yaml.dump(config, f, Dumper=Dumper)
 # read model_type from yaml file
 def read_inference_type(path):
     inference_type = ""
         inference_type = config.get("inference_type", "")
     return inference_type
 # write model_type to yaml file
 def write_inference_type(use_inference):
     with open(YAML_PATH, "r+") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     if use_inference:
+        config["inference_type"] = "hf_inference_api"
     else:
+        config["inference_type"] = "hf_pipeline"
     # save inference_type to inference_type in yaml
     yaml.dump(config, f, Dumper=Dumper)
 # read column mapping from yaml file
 def read_column_mapping(path):
     column_mapping = {}
             column_mapping = config.get("column_mapping", dict())
     return column_mapping
 # write column mapping to yaml file
 def write_column_mapping(mapping):
     with open(YAML_PATH, "r") as f:
         # save column_mapping to column_mapping in yaml
         yaml.dump(config, f, Dumper=Dumper)
 # convert column mapping dataframe to json
 def convert_column_mapping_to_json(df, label=""):
     column_mapping = {}
         column_mapping[label].append(row.tolist())
     return column_mapping
 def get_logs_file(uid):
     try:
         file = open(f"./tmp/{uid}_log", "r")
     except Exception:
         return "Log file does not exist"
 def write_log_to_user_file(id, log):
     with open(f"./tmp/{id}_log", "a") as f:
         f.write(log)
 def save_job_to_pipe(id, job, lock):
+    if not os.path.exists("./tmp"):
+        os.makedirs("./tmp")
     job = [str(i) for i in job]
     job = ",".join(job)
     print(job)
     with lock:
         with open(PIPE_PATH, "a") as f:
             # write each element in job
+            f.write(f"{id}@{job}\n")
 def pop_job_from_pipe():
     if not os.path.exists(PIPE_PATH):
         f.close()
     if len(job) == 0:
         return
+    job_info = job.split("\n")[0].split("@")
     if len(job_info) != 2:
         raise ValueError("Invalid job info: ", job_info)

mlflow_test.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from pathlib import Path
+from mlflow.utils.environment import _PythonEnv
+from mlflow.utils.virtualenv import (
+    _PYENV_ROOT_DIR,
+    _VIRTUALENV_ENVS_DIR,
+    _create_virtualenv,
+    _get_mlflow_virtualenv_root,
+    _get_virtualenv_extra_env_vars,
+    _get_virtualenv_name,
+    _install_python,
+)
+_create_virtualenv(
+    "/Users/inoki/giskard-home/projects/credit/models/2a2b6a9c-4050-4bb6-9024-00bf15651262",
+    Path("/opt/homebrew/bin/python3.10"),
+    Path("/Users/inoki/giskard-home/mlflow-venv1"),
+    _PythonEnv()
+)

run_jobs.py CHANGED Viewed

@@ -1,11 +1,13 @@
-from io_utils import pop_job_from_pipe
-import time
 import threading
 def start_process_run_job():
     try:
         print("Running jobs in thread")
-        global thread
         thread = threading.Thread(target=run_job)
         thread.daemon = True
         thread.do_run = True
@@ -13,11 +15,14 @@ def start_process_run_job():
     except Exception as e:
         print("Failed to start thread: ", e)
 def stop_thread():
     print("Stop thread")
     thread.do_run = False
-def run_job():
     while True:
         print(thread.do_run)
         try:
@@ -26,4 +31,4 @@ def run_job():
         except KeyboardInterrupt:
             print("KeyboardInterrupt stop background thread")
             stop_thread()
-            break

 import threading
+import time
+from io_utils import pop_job_from_pipe
 def start_process_run_job():
     try:
         print("Running jobs in thread")
+        global thread
         thread = threading.Thread(target=run_job)
         thread.daemon = True
         thread.do_run = True
     except Exception as e:
         print("Failed to start thread: ", e)
 def stop_thread():
     print("Stop thread")
     thread.do_run = False
+def run_job():
     while True:
         print(thread.do_run)
         try:
         except KeyboardInterrupt:
             print("KeyboardInterrupt stop background thread")
             stop_thread()
+            break

text_classification.py CHANGED Viewed

@@ -1,10 +1,12 @@
-import datasets
-import logging
 import json
-import pandas as pd
 import huggingface_hub
 from transformers import pipeline
 def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config)[split]
@@ -13,9 +15,12 @@ def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
         features = [f for f in dataset_features.keys() if f != "label"]
         return labels, features
     except Exception as e:
-        logging.warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
         return None, None
 def check_model(model_id):
     try:
         task = huggingface_hub.model_info(model_id).pipeline_tag
@@ -28,7 +33,7 @@ def check_model(model_id):
         return ppl
     except Exception:
         return None
 def text_classificaiton_match_label_case_unsensative(id2label_mapping, label):
     for model_label in id2label_mapping.keys():
@@ -45,7 +50,7 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
             continue
         if len(feature.names) != len(id2label_mapping.keys()):
             continue
         dataset_labels = feature.names
         # Try to match labels
         for label in feature.names:
@@ -53,7 +58,9 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
                 model_label = label
             else:
                 # Try to find case unsensative
-                model_label, label = text_classificaiton_match_label_case_unsensative(id2label_mapping, label)
             if model_label is not None:
                 id2label_mapping[model_label] = label
             else:
@@ -61,7 +68,8 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
     return id2label_mapping, dataset_labels
-'''
 params:
     column_mapping: dict
     example: {
@@ -72,7 +80,9 @@ params:
         }
     }
     ppl: pipeline
-'''
 def check_column_mapping_keys_validity(column_mapping, ppl):
     # get the element in all the list elements
     column_mapping = json.loads(column_mapping)
@@ -83,10 +93,11 @@ def check_column_mapping_keys_validity(column_mapping, ppl):
     id2label = ppl.model.config.id2label
     original_labels = set(id2label.values())
     return user_labels == model_labels == original_labels
-'''
 params:
     column_mapping: dict
     dataset_features: dict
@@ -94,7 +105,9 @@ params:
         'text': Value(dtype='string', id=None),
         'label': ClassLabel(names=['negative', 'neutral', 'positive'], id=None)
     }
-'''
 def infer_text_input_column(column_mapping, dataset_features):
     # Check whether we need to infer the text input column
     infer_text_input_column = True
@@ -109,18 +122,20 @@ def infer_text_input_column(column_mapping, dataset_features):
     if infer_text_input_column:
         # Try to retrieve one
-        candidates = [f for f in dataset_features if dataset_features[f].dtype == "string"]
-        feature_map_df = pd.DataFrame({
-            "Dataset Features": [candidates[0]],
-            "Model Input Features": ["text"]
-        })
         if len(candidates) > 0:
             logging.debug(f"Candidates are {candidates}")
             column_mapping["text"] = candidates[0]
     return column_mapping, feature_map_df
-'''
 params:
     column_mapping: dict
     id2label_mapping: dict
@@ -130,8 +145,12 @@ params:
         'neutral': 'neutral',
         'positive': 'positive'
         }
-'''
-def infer_output_label_column(column_mapping, id2label_mapping, id2label, dataset_labels):
     # Check whether we need to infer the output label column
     if "data" in column_mapping.keys():
         if isinstance(column_mapping["data"], list):
@@ -139,25 +158,29 @@ def infer_output_label_column(column_mapping, id2label_mapping, id2label, datase
             for user_label, model_label in column_mapping["data"]:
                 id2label_mapping[model_label] = user_label
     elif None in id2label_mapping.values():
-        column_mapping["label"] = {
-            i: None for i in id2label.keys()
-        }
         return column_mapping, None
     if "data" not in column_mapping.keys():
         # Column mapping should contain original model labels
         column_mapping["label"] = {
-            str(i): id2label_mapping[label] for i, label in zip(id2label.keys(), dataset_labels)
         }
     # print('>>>>> column_mapping >>>>>', column_mapping)
-    id2label_df = pd.DataFrame({
-        "Dataset Labels": dataset_labels,
-        "Model Prediction Labels": [id2label_mapping[label] for label in dataset_labels],
-    })
     return column_mapping, id2label_df
 def check_dataset_features_validity(d_id, config, split):
     # We assume dataset is ok here
     ds = datasets.load_dataset(d_id, config)[split]
@@ -171,6 +194,7 @@ def check_dataset_features_validity(d_id, config, split):
     return df, dataset_features
 def get_example_prediction(ppl, dataset_id, dataset_config, dataset_split):
     # get a sample prediction from the model on the dataset
     prediction_input = None
@@ -184,7 +208,7 @@ def get_example_prediction(ppl, dataset_id, dataset_config, dataset_split):
         else:
             prediction_input = ds[0]["text"]
-        print('prediction_input', prediction_input)
         results = ppl(prediction_input, top_k=None)
         # Display results in original label and mapped label
         prediction_result = {
@@ -193,7 +217,6 @@ def get_example_prediction(ppl, dataset_id, dataset_config, dataset_split):
     except Exception:
         # Pipeline prediction failed, need to provide labels
         return prediction_input, None
     return prediction_input, prediction_result
@@ -212,37 +235,55 @@ def get_sample_prediction(ppl, df, column_mapping, id2label_mapping):
     except Exception:
         # Pipeline prediction failed, need to provide labels
         return prediction_input, None
     # Display results in original label and mapped label
     prediction_result = {
-        f'{result["label"]}(original) - {id2label_mapping[result["label"]]}(mapped)': result["score"] for result in results
     }
     return prediction_input, prediction_result
 def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
     # load dataset as pd DataFrame
     # get features column from dataset
     df, dataset_features = check_dataset_features_validity(d_id, config, split)
-    column_mapping, feature_map_df = infer_text_input_column(column_mapping, dataset_features)
     if feature_map_df is None:
         # dataset does not have any features
-        return None, None, None, None, None
     # Retrieve all labels
     id2label = ppl.model.config.id2label
     # Infer labels
-    id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(id2label, dataset_features)
-    column_mapping, id2label_df = infer_output_label_column(column_mapping, id2label_mapping, id2label, dataset_labels)
     if id2label_df is None:
         # does not able to infer output label column
         return column_mapping, None, None, None, feature_map_df
     # Get a sample prediction
-    prediction_input, prediction_result = get_sample_prediction(ppl, df, column_mapping, id2label_mapping)
     if prediction_result is None:
         # does not able to get a sample prediction
         return column_mapping, prediction_input, None, id2label_df, feature_map_df
-    return column_mapping, prediction_input, prediction_result, id2label_df, feature_map_df

 import json
+import logging
+import datasets
 import huggingface_hub
+import pandas as pd
 from transformers import pipeline
 def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config)[split]
         features = [f for f in dataset_features.keys() if f != "label"]
         return labels, features
     except Exception as e:
+        logging.warning(
+            f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
+        )
         return None, None
 def check_model(model_id):
     try:
         task = huggingface_hub.model_info(model_id).pipeline_tag
         return ppl
     except Exception:
         return None
 def text_classificaiton_match_label_case_unsensative(id2label_mapping, label):
     for model_label in id2label_mapping.keys():
             continue
         if len(feature.names) != len(id2label_mapping.keys()):
             continue
         dataset_labels = feature.names
         # Try to match labels
         for label in feature.names:
                 model_label = label
             else:
                 # Try to find case unsensative
+                model_label, label = text_classificaiton_match_label_case_unsensative(
+                    id2label_mapping, label
+                )
             if model_label is not None:
                 id2label_mapping[model_label] = label
             else:
     return id2label_mapping, dataset_labels
+"""
 params:
     column_mapping: dict
     example: {
         }
     }
     ppl: pipeline
+"""
 def check_column_mapping_keys_validity(column_mapping, ppl):
     # get the element in all the list elements
     column_mapping = json.loads(column_mapping)
     id2label = ppl.model.config.id2label
     original_labels = set(id2label.values())
     return user_labels == model_labels == original_labels
+"""
 params:
     column_mapping: dict
     dataset_features: dict
         'text': Value(dtype='string', id=None),
         'label': ClassLabel(names=['negative', 'neutral', 'positive'], id=None)
     }
+"""
 def infer_text_input_column(column_mapping, dataset_features):
     # Check whether we need to infer the text input column
     infer_text_input_column = True
     if infer_text_input_column:
         # Try to retrieve one
+        candidates = [
+            f for f in dataset_features if dataset_features[f].dtype == "string"
+        ]
+        feature_map_df = pd.DataFrame(
+            {"Dataset Features": [candidates[0]], "Model Input Features": ["text"]}
+        )
         if len(candidates) > 0:
             logging.debug(f"Candidates are {candidates}")
             column_mapping["text"] = candidates[0]
     return column_mapping, feature_map_df
+"""
 params:
     column_mapping: dict
     id2label_mapping: dict
         'neutral': 'neutral',
         'positive': 'positive'
         }
+"""
+def infer_output_label_column(
+    column_mapping, id2label_mapping, id2label, dataset_labels
+):
     # Check whether we need to infer the output label column
     if "data" in column_mapping.keys():
         if isinstance(column_mapping["data"], list):
             for user_label, model_label in column_mapping["data"]:
                 id2label_mapping[model_label] = user_label
     elif None in id2label_mapping.values():
+        column_mapping["label"] = {i: None for i in id2label.keys()}
         return column_mapping, None
     if "data" not in column_mapping.keys():
         # Column mapping should contain original model labels
         column_mapping["label"] = {
+            str(i): id2label_mapping[label]
+            for i, label in zip(id2label.keys(), dataset_labels)
         }
     # print('>>>>> column_mapping >>>>>', column_mapping)
+    id2label_df = pd.DataFrame(
+        {
+            "Dataset Labels": dataset_labels,
+            "Model Prediction Labels": [
+                id2label_mapping[label] for label in dataset_labels
+            ],
+        }
+    )
     return column_mapping, id2label_df
 def check_dataset_features_validity(d_id, config, split):
     # We assume dataset is ok here
     ds = datasets.load_dataset(d_id, config)[split]
     return df, dataset_features
 def get_example_prediction(ppl, dataset_id, dataset_config, dataset_split):
     # get a sample prediction from the model on the dataset
     prediction_input = None
         else:
             prediction_input = ds[0]["text"]
+        print("prediction_input", prediction_input)
         results = ppl(prediction_input, top_k=None)
         # Display results in original label and mapped label
         prediction_result = {
     except Exception:
         # Pipeline prediction failed, need to provide labels
         return prediction_input, None
     return prediction_input, prediction_result
     except Exception:
         # Pipeline prediction failed, need to provide labels
         return prediction_input, None
     # Display results in original label and mapped label
     prediction_result = {
+        f'{result["label"]}(original) - {id2label_mapping[result["label"]]}(mapped)': result[
+            "score"
+        ]
+        for result in results
     }
     return prediction_input, prediction_result
 def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
     # load dataset as pd DataFrame
     # get features column from dataset
     df, dataset_features = check_dataset_features_validity(d_id, config, split)
+    column_mapping, feature_map_df = infer_text_input_column(
+        column_mapping, dataset_features
+    )
     if feature_map_df is None:
         # dataset does not have any features
+        return None, None, None, None, None
     # Retrieve all labels
     id2label = ppl.model.config.id2label
     # Infer labels
+    id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(
+        id2label, dataset_features
+    )
+    column_mapping, id2label_df = infer_output_label_column(
+        column_mapping, id2label_mapping, id2label, dataset_labels
+    )
     if id2label_df is None:
         # does not able to infer output label column
         return column_mapping, None, None, None, feature_map_df
     # Get a sample prediction
+    prediction_input, prediction_result = get_sample_prediction(
+        ppl, df, column_mapping, id2label_mapping
+    )
     if prediction_result is None:
         # does not able to get a sample prediction
         return column_mapping, prediction_input, None, id2label_df, feature_map_df
+    return (
+        column_mapping,
+        prediction_input,
+        prediction_result,
+        id2label_df,
+        feature_map_df,
+    )

text_classification_ui_helpers.py CHANGED Viewed

@@ -1,23 +1,35 @@
-import gradio as gr
-from wordings import CONFIRM_MAPPING_DETAILS_FAIL_RAW
 import json
-import os
 import logging
 import threading
-from io_utils import read_column_mapping, write_column_mapping, save_job_to_pipe, write_log_to_user_file
 import datasets
-import collections
-from text_classification import get_labels_and_features_from_dataset, check_model, get_example_prediction
 from transformers.pipelines import TextClassificationPipeline
 MAX_LABELS = 20
 MAX_FEATURES = 20
-HF_REPO_ID = 'HF_REPO_ID'
-HF_SPACE_ID = 'SPACE_ID'
-HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
 CONFIG_PATH = "./config.yaml"
 def check_dataset_and_get_config(dataset_id):
     try:
         write_column_mapping(None)
@@ -27,6 +39,7 @@ def check_dataset_and_get_config(dataset_id):
         # Dataset may not exist
         pass
 def check_dataset_and_get_split(dataset_id, dataset_config):
     try:
         splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
@@ -36,8 +49,11 @@ def check_dataset_and_get_split(dataset_id, dataset_config):
         # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
         pass
 def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *labels):
-    ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
     if labels is None:
         return
     labels = [*labels]
@@ -54,45 +70,73 @@ def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *l
     if "features" not in all_mappings.keys():
         all_mappings["features"] = dict()
-    for i, feat in enumerate(labels[MAX_LABELS:(MAX_LABELS + MAX_FEATURES)]):
         if feat:
             all_mappings["features"][feat] = ds_features[i]
     write_column_mapping(all_mappings)
 def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
     model_labels = list(model_id2label.values())
     len_model_labels = len(model_labels)
-    print(model_labels, model_id2label, 3%len_model_labels)
-    lables = [gr.Dropdown(label=f"{label}", choices=model_labels, value=model_id2label[i%len_model_labels], interactive=True, visible=True) for i, label in enumerate(ds_labels[:MAX_LABELS])]
     lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
     # TODO: Substitute 'text' with more features for zero-shot
-    features = [gr.Dropdown(label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True) for feature in ['text']]
-    features += [gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))]
     return lables + features
-def check_model_and_show_prediction(model_id, dataset_id, dataset_config, dataset_split):
     ppl = check_model(model_id)
     if ppl is None or not isinstance(ppl, TextClassificationPipeline):
         gr.Warning("Please check your model.")
         return (
             gr.update(visible=False),
             gr.update(visible=False),
-            *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
         )
-    dropdown_placement = [gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
-    if ppl is None: # pipeline not found
         gr.Warning("Model not found")
         return (
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False, open=False),
-            *dropdown_placement
         )
     model_id2label = ppl.model.config.id2label
-    ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
     # when dataset does not have labels or features
     if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
         # gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
@@ -100,9 +144,9 @@ def check_model_and_show_prediction(model_id, dataset_id, dataset_config, datase
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False, open=False),
-            *dropdown_placement
         )
     column_mappings = list_labels_and_features_from_dataset(
         ds_labels,
         ds_features,
@@ -111,23 +155,29 @@ def check_model_and_show_prediction(model_id, dataset_id, dataset_config, datase
     # when labels or features are not aligned
     # show manually column mapping
-    if collections.Counter(model_id2label.values()) != collections.Counter(ds_labels) or ds_features[0] != 'text':
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=True, open=True),
-            *column_mappings
         )
-    prediction_input, prediction_output = get_example_prediction(ppl, dataset_id, dataset_config, dataset_split)
     return (
         gr.update(value=prediction_input, visible=True),
         gr.update(value=prediction_output, visible=True),
         gr.update(visible=True, open=False),
-        *column_mappings
     )
 def try_submit(m_id, d_id, config, split, local, uid):
     all_mappings = read_column_mapping(CONFIG_PATH)
@@ -139,7 +189,7 @@ def try_submit(m_id, d_id, config, split, local, uid):
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
     label_mapping = all_mappings["labels"]
     if "features" not in all_mappings.keys():
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
@@ -150,32 +200,47 @@ def try_submit(m_id, d_id, config, split, local, uid):
         command = [
             "python",
             "cli.py",
-            "--loader", "huggingface",
-            "--model", m_id,
-            "--dataset", d_id,
-            "--dataset_config", config,
-            "--dataset_split", split,
-            "--hf_token", os.environ.get(HF_WRITE_TOKEN),
-            "--discussion_repo", os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
-            "--output_format", "markdown",
-            "--output_portal", "huggingface",
-            "--feature_mapping", json.dumps(feature_mapping),
-            "--label_mapping", json.dumps(label_mapping),
-            "--scan_config", "../config.yaml",
         ]
         eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
         logging.info(f"Start local evaluation on {eval_str}")
         save_job_to_pipe(uid, command, threading.Lock())
-        write_log_to_user_file(uid, f"Start local evaluation on {eval_str}. Please wait for your job to start...\n")
         gr.Info(f"Start local evaluation on {eval_str}")
         return (
             gr.update(interactive=False),
-            gr.update(lines=5, visible=True, interactive=False))
     else:
         gr.Info("TODO: Submit task to an endpoint")
-    return (gr.update(interactive=True),  # Submit button
-            gr.update(visible=False))

+import collections
 import json
 import logging
+import os
 import threading
 import datasets
+import gradio as gr
 from transformers.pipelines import TextClassificationPipeline
+from io_utils import (
+    read_column_mapping,
+    save_job_to_pipe,
+    write_column_mapping,
+    write_log_to_user_file,
+)
+from text_classification import (
+    check_model,
+    get_example_prediction,
+    get_labels_and_features_from_dataset,
+)
+from wordings import CONFIRM_MAPPING_DETAILS_FAIL_RAW
 MAX_LABELS = 20
 MAX_FEATURES = 20
+HF_REPO_ID = "HF_REPO_ID"
+HF_SPACE_ID = "SPACE_ID"
+HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
 CONFIG_PATH = "./config.yaml"
 def check_dataset_and_get_config(dataset_id):
     try:
         write_column_mapping(None)
         # Dataset may not exist
         pass
 def check_dataset_and_get_split(dataset_id, dataset_config):
     try:
         splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
         # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
         pass
 def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *labels):
+    ds_labels, ds_features = get_labels_and_features_from_dataset(
+        dataset_id, dataset_config, dataset_split
+    )
     if labels is None:
         return
     labels = [*labels]
     if "features" not in all_mappings.keys():
         all_mappings["features"] = dict()
+    for i, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
         if feat:
             all_mappings["features"][feat] = ds_features[i]
     write_column_mapping(all_mappings)
 def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
     model_labels = list(model_id2label.values())
     len_model_labels = len(model_labels)
+    print(model_labels, model_id2label, 3 % len_model_labels)
+    lables = [
+        gr.Dropdown(
+            label=f"{label}",
+            choices=model_labels,
+            value=model_id2label[i % len_model_labels],
+            interactive=True,
+            visible=True,
+        )
+        for i, label in enumerate(ds_labels[:MAX_LABELS])
+    ]
     lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
     # TODO: Substitute 'text' with more features for zero-shot
+    features = [
+        gr.Dropdown(
+            label=f"{feature}",
+            choices=ds_features,
+            value=ds_features[0],
+            interactive=True,
+            visible=True,
+        )
+        for feature in ["text"]
+    ]
+    features += [
+        gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))
+    ]
     return lables + features
+def check_model_and_show_prediction(
+    model_id, dataset_id, dataset_config, dataset_split
+):
     ppl = check_model(model_id)
     if ppl is None or not isinstance(ppl, TextClassificationPipeline):
         gr.Warning("Please check your model.")
         return (
             gr.update(visible=False),
             gr.update(visible=False),
+            *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)],
         )
+    dropdown_placement = [
+        gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
+    ]
+    if ppl is None:  # pipeline not found
         gr.Warning("Model not found")
         return (
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False, open=False),
+            *dropdown_placement,
         )
     model_id2label = ppl.model.config.id2label
+    ds_labels, ds_features = get_labels_and_features_from_dataset(
+        dataset_id, dataset_config, dataset_split
+    )
     # when dataset does not have labels or features
     if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
         # gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False, open=False),
+            *dropdown_placement,
         )
     column_mappings = list_labels_and_features_from_dataset(
         ds_labels,
         ds_features,
     # when labels or features are not aligned
     # show manually column mapping
+    if (
+        collections.Counter(model_id2label.values()) != collections.Counter(ds_labels)
+        or ds_features[0] != "text"
+    ):
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=True, open=True),
+            *column_mappings,
         )
+    prediction_input, prediction_output = get_example_prediction(
+        ppl, dataset_id, dataset_config, dataset_split
+    )
     return (
         gr.update(value=prediction_input, visible=True),
         gr.update(value=prediction_output, visible=True),
         gr.update(visible=True, open=False),
+        *column_mappings,
     )
 def try_submit(m_id, d_id, config, split, local, uid):
     all_mappings = read_column_mapping(CONFIG_PATH)
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
     label_mapping = all_mappings["labels"]
     if "features" not in all_mappings.keys():
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
         command = [
             "python",
             "cli.py",
+            "--loader",
+            "huggingface",
+            "--model",
+            m_id,
+            "--dataset",
+            d_id,
+            "--dataset_config",
+            config,
+            "--dataset_split",
+            split,
+            "--hf_token",
+            os.environ.get(HF_WRITE_TOKEN),
+            "--discussion_repo",
+            os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
+            "--output_format",
+            "markdown",
+            "--output_portal",
+            "huggingface",
+            "--feature_mapping",
+            json.dumps(feature_mapping),
+            "--label_mapping",
+            json.dumps(label_mapping),
+            "--scan_config",
+            "../config.yaml",
         ]
         eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
         logging.info(f"Start local evaluation on {eval_str}")
         save_job_to_pipe(uid, command, threading.Lock())
+        write_log_to_user_file(
+            uid,
+            f"Start local evaluation on {eval_str}. Please wait for your job to start...\n",
+        )
         gr.Info(f"Start local evaluation on {eval_str}")
         return (
             gr.update(interactive=False),
+            gr.update(lines=5, visible=True, interactive=False),
+        )
     else:
         gr.Info("TODO: Submit task to an endpoint")
+    return (gr.update(interactive=True), gr.update(visible=False))  # Submit button

validate_queue.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import random
+import time
+import gradio as gr
+def sleep_a_while():
+    seconds = random.randint(5, 10)
+    print(f"Working for {seconds} seconds")
+    start = time.time()
+    while start + seconds > time.time():
+        continue
+    return str(seconds)
+with gr.Blocks() as iface:
+    text = gr.Textbox(label="Slept second")
+    run_btn = gr.Button("Run")
+    run_btn.click(sleep_a_while, queue=False, outputs=text, concurrency_limit=1)
+if __name__ == "__main__":
+    iface.queue(max_size=2, default_concurrency_limit=2).launch()

wordings.py CHANGED Viewed

@@ -1,22 +1,22 @@
-INTRODUCTION_MD = '''
                 <h1 style="text-align: center;">
                 🐢Giskard Evaluator
                 </h1>
                 Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
-                '''
-CONFIRM_MAPPING_DETAILS_MD = '''
                             <h1 style="text-align: center;">
                             Confirm Pre-processing Details
                             </h1>
                             Please confirm the pre-processing details below. Align the column names of your model in the <b>dropdown</b> menu to your dataset's. If you are not sure, please double check your model and dataset.
-                            '''
-CONFIRM_MAPPING_DETAILS_FAIL_MD = '''
                             <h1 style="text-align: center;">
                             Confirm Pre-processing Details
                             </h1>
                             Sorry, we cannot align the input/output of your dataset with the model. <b>Pleaser double check your model and dataset.</b>
-                            '''
-CONFIRM_MAPPING_DETAILS_FAIL_RAW= '''
                             Sorry, we cannot align the input/output of your dataset with the model. Pleaser double check your model and dataset.
-                            '''

+INTRODUCTION_MD = """
                 <h1 style="text-align: center;">
                 🐢Giskard Evaluator
                 </h1>
                 Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
+                """
+CONFIRM_MAPPING_DETAILS_MD = """
                             <h1 style="text-align: center;">
                             Confirm Pre-processing Details
                             </h1>
                             Please confirm the pre-processing details below. Align the column names of your model in the <b>dropdown</b> menu to your dataset's. If you are not sure, please double check your model and dataset.
+                            """
+CONFIRM_MAPPING_DETAILS_FAIL_MD = """
                             <h1 style="text-align: center;">
                             Confirm Pre-processing Details
                             </h1>
                             Sorry, we cannot align the input/output of your dataset with the model. <b>Pleaser double check your model and dataset.</b>
+                            """
+CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
                             Sorry, we cannot align the input/output of your dataset with the model. Pleaser double check your model and dataset.
+                            """