flux-quant

Running on A100

App Files Files Community

derekl35 HF Staff commited on 16 days ago

Commit

e9b7b43

verified ·

1 Parent(s): 4fa756e

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -241

app.py CHANGED Viewed

@@ -35,24 +35,6 @@ def _save_agg_stats(stats: dict) -> None:
         with open(AGG_FILE, "w") as f:
             json.dump(stats, f, indent=2)
-USER_STATS_FILE = Path(__file__).parent / "user_stats.json"
-USER_STATS_LOCK_FILE = USER_STATS_FILE.with_suffix(".lock")
-def _load_user_stats() -> dict:
-    if USER_STATS_FILE.exists():
-        with open(USER_STATS_FILE, "r") as f:
-            try:
-                return json.load(f)
-            except json.JSONDecodeError:
-                print(f"Warning: {USER_STATS_FILE} is corrupted. Starting with empty user stats.")
-                return {}
-    return {}
-def _save_user_stats(stats: dict) -> None:
-    with InterProcessLock(str(USER_STATS_LOCK_FILE)):
-        with open(USER_STATS_FILE, "w") as f:
-            json.dump(stats, f, indent=2)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {DEVICE}")
@@ -62,7 +44,7 @@ DEFAULT_GUIDANCE_SCALE = 3.5
 DEFAULT_NUM_INFERENCE_STEPS = 15
 DEFAULT_MAX_SEQUENCE_LENGTH = 512
 HF_TOKEN = os.environ.get("HF_ACCESS_TOKEN")
-HF_DATASET_REPO_ID = "derekl35/flux-quant-challenge-submissions"
 CACHED_PIPES = {}
 def load_bf16_pipeline():
@@ -99,7 +81,6 @@ def load_bnb_8bit_pipeline():
             torch_dtype=torch.bfloat16
         )
         pipe.to(DEVICE)
-        # pipe.enable_model_cpu_offload()
         end_time = time.time()
         mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
         print(f"8-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
@@ -121,7 +102,6 @@ def load_bnb_4bit_pipeline():
             torch_dtype=torch.bfloat16
         )
         pipe.to(DEVICE)
-        # pipe.enable_model_cpu_offload()
         end_time = time.time()
         mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
         print(f"4-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
@@ -134,10 +114,10 @@ def load_bnb_4bit_pipeline():
 @spaces.GPU(duration=240)
 def generate_images(prompt, quantization_choice, progress=gr.Progress(track_tqdm=True)):
     if not prompt:
-        return None, {}, gr.update(value="Please enter a prompt.", interactive=False), gr.update(choices=[], value=None), gr.update(interactive=True), gr.update(interactive=True)
     if not quantization_choice:
-        return None, {}, gr.update(value="Please select a quantization method.", interactive=False), gr.update(choices=[], value=None), gr.update(interactive=True), gr.update(interactive=True)
     if quantization_choice == "8-bit bnb":
         quantized_load_func = load_bnb_8bit_pipeline
@@ -146,7 +126,7 @@ def generate_images(prompt, quantization_choice, progress=gr.Progress(track_tqdm
         quantized_load_func = load_bnb_4bit_pipeline
         quantized_label = "Quantized (4-bit bnb)"
     else:
-        return None, {}, gr.update(value="Invalid quantization choice.", interactive=False), gr.update(choices=[], value=None), gr.update(interactive=True), gr.update(interactive=True)
     model_configs = [
         ("Original", load_bf16_pipeline),
@@ -188,11 +168,11 @@ def generate_images(prompt, quantization_choice, progress=gr.Progress(track_tqdm
         except Exception as e:
             print(f"Error during {label} model processing: {e}")
-            return None, {}, gr.update(value=f"Error processing {label} model: {e}", interactive=False), gr.update(choices=[], value=None), gr.update(interactive=True), gr.update(interactive=True)
     if len(results) != len(model_configs):
-        return None, {}, gr.update(value="Failed to generate images for all model types.", interactive=False), gr.update(choices=[], value=None), gr.update(interactive=True), gr.update(interactive=True)
     shuffled_results = results.copy()
     random.shuffle(shuffled_results)
@@ -263,13 +243,6 @@ def _accuracy_string(correct: int, attempts: int) -> tuple[str, float]:
         return f"{pct:.1f}%", pct
     return "N/A", -1.0
-def _add_medals(user_rows):
-    MEDALS = {0: "🥇 ", 1: "🥈 ", 2: "🥉 "}
-    return [
-        [MEDALS.get(i, "") + row[0], *row[1:]]
-        for i, row in enumerate(user_rows)
-    ]
 def update_leaderboards_data():
     agg = _load_agg_stats()
     quant_rows = []
@@ -282,50 +255,12 @@ def update_leaderboards_data():
             acc_str
         ])
     quant_rows.sort(key=lambda r: r[1]/r[2] if r[2] != 0 else 1e9)
-    user_stats_all = _load_user_stats()
-    overall_user_rows = []
-    for user, per_method_stats_dict in user_stats_all.items():
-        user_total_correct = 0
-        user_total_attempts = 0
-        for method_stats in per_method_stats_dict.values():
-            user_total_correct += method_stats.get("correct", 0)
-            user_total_attempts += method_stats.get("attempts", 0)
-        if user_total_attempts >= 1:
-            acc_str, _ = _accuracy_string(user_total_correct, user_total_attempts)
-            overall_user_rows.append([user, user_total_correct, user_total_attempts, acc_str])
-    overall_user_rows.sort(key=lambda r: (-float(r[3].rstrip('%')) if r[3] != "N/A" else float('-inf'), -r[2]))
-    overall_user_rows_medaled = _add_medals(overall_user_rows)
-    user_leaderboards_per_method = {}
-    quant_method_names = list(agg.keys())
-    for method_name in quant_method_names:
-        method_specific_user_rows = []
-        for user, per_user_method_stats_dict in user_stats_all.items():
-            if method_name in per_user_method_stats_dict:
-                st = per_user_method_stats_dict[method_name]
-                if st.get("attempts", 0) >= 1: # Only include users who have attempted this method
-                    acc_str, _ = _accuracy_string(st["correct"], st["attempts"])
-                    method_specific_user_rows.append([user, st["correct"], st["attempts"], acc_str])
-        method_specific_user_rows.sort(key=lambda r: (-float(r[3].rstrip('%')) if r[3] != "N/A" else float('-inf'), -r[2]))
-        method_specific_user_rows_medaled = _add_medals(method_specific_user_rows)
-        user_leaderboards_per_method[method_name] = method_specific_user_rows_medaled
-    return quant_rows, overall_user_rows_medaled, user_leaderboards_per_method
 quant_df = gr.DataFrame(
     headers=["Method", "Correct Guesses", "Total Attempts", "Detectability %"],
     interactive=False, col_count=(4, "fixed")
 )
-user_df = gr.DataFrame(
-    headers=["User", "Correct Guesses", "Total Attempts", "Accuracy %"],
-    interactive=False, col_count=(4, "fixed")
-)
 with gr.Blocks(title="FLUX Quantization Challenge", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# FLUX Model Quantization Challenge")
@@ -372,26 +307,16 @@ with gr.Blocks(title="FLUX Quantization Challenge", theme=gr.themes.Soft()) as d
             with gr.Row():
                 session_score_box  = gr.Textbox(label="Your accuracy this session", interactive=False)
-            with gr.Row(equal_height=False):
-                username_input = gr.Textbox(
-                    label="Enter Your Name for Leaderboard",
-                    placeholder="YourName",
-                    visible=False,
-                    interactive=True,
-                    scale=2
-                )
-                add_score_button = gr.Button(
-                    "Add My Score to Leaderboard",
-                    visible=False,
-                    variant="secondary",
-                    scale=1
-                )
-            add_score_feedback = gr.Textbox(
-                label="Leaderboard Update",
-                visible=False,
-                interactive=False,
-                lines=1
             )
             correct_mapping_state = gr.State({})
@@ -400,29 +325,26 @@ with gr.Blocks(title="FLUX Quantization Challenge", theme=gr.themes.Soft()) as d
                 "4-bit bnb": {"attempts": 0, "correct": 0}}
             )
             is_example_state = gr.State(False)
-            has_added_score_state = gr.State(False)
             prompt_state = gr.State("")
             seed_state = gr.State(None)
             results_state = gr.State([])
             def _load_example_and_update_dfs(sel_summary):
-                # Find the index of the selected example by its summary
                 idx = next((i for i, ex in enumerate(EXAMPLES) if ex["summary"] == sel_summary), -1)
                 if idx == -1:
-                    # Fallback or error handling if summary not found
                     print(f"Error: Example with summary '{sel_summary}' not found.")
-                    return (gr.update(), gr.update(), gr.update(), False, gr.update(), gr.update(), "", None, [])
                 ex = EXAMPLES[idx]
                 gallery_items, mapping, prompt = load_example(idx)
-                quant_data, overall_user_data, _ = update_leaderboards_data()
-                return gallery_items, mapping, prompt, True, quant_data, overall_user_data, "", None, []
             ex_selector.change(
                 fn=_load_example_and_update_dfs,
                 inputs=ex_selector,
-                outputs=[output_gallery, correct_mapping_state, prompt_input, is_example_state, quant_df, user_df,
-                        prompt_state, seed_state, results_state],
             ).then(
                 lambda: (gr.update(interactive=True), gr.update(interactive=True)),
                 outputs=[image1_btn, image2_btn],
@@ -432,50 +354,39 @@ with gr.Blocks(title="FLUX Quantization Challenge", theme=gr.themes.Soft()) as d
                 fn=generate_images,
                 inputs=[prompt_input, quantization_choice_radio],
                 outputs=[output_gallery, correct_mapping_state, prompt_state, seed_state, results_state,
-                        feedback_box] #, quantization_choice_radio, generate_button, prompt_input]
             ).then(
-                lambda: (False, # for is_example_state
-                         False, # for has_added_score_state
-                         gr.update(visible=False, value="", interactive=True), # username_input reset
-                         gr.update(visible=False), # add_score_button reset
-                         gr.update(visible=False, value="")), # add_score_feedback reset
-                outputs=[is_example_state,
-                         has_added_score_state,
-                         username_input,
-                         add_score_button,
-                         add_score_feedback]
             ).then(
                 lambda: (gr.update(interactive=True),
-                        gr.update(interactive=True),
-                        ""),
                 outputs=[image1_btn, image2_btn, feedback_box],
             )
-            def choose(choice_string, mapping, session_stats, is_example, has_added_score_curr,
-                    prompt, seed, results, username):
                 feedback = check_guess(choice_string, mapping)
                 if not mapping:
-                    return feedback, gr.update(), gr.update(), "", session_stats, [], [], gr.update(), gr.update(), gr.update()
                 quant_label_from_mapping = next((label for label in mapping.values() if "Quantized" in label), None)
                 if not quant_label_from_mapping:
                     print("Error: Could not determine quantization label from mapping:", mapping)
                     return ("Internal Error: Could not process results.", gr.update(interactive=False), gr.update(interactive=False),
-                            "", session_stats, [], [], gr.update(), gr.update(), gr.update())
                 quant_key = "8-bit bnb" if "8-bit bnb" in quant_label_from_mapping else "4-bit bnb"
                 got_it_right = "Correct!" in feedback
                 sess = session_stats.copy()
-                should_log_and_update_stats = not is_example and not has_added_score_curr
-                if should_log_and_update_stats:
                     sess[quant_key]["attempts"] += 1
                     if got_it_right:
                         sess[quant_key]["correct"] += 1
-                    session_stats = sess
                     AGG_STATS = _load_agg_stats()
                     AGG_STATS[quant_key]["attempts"] += 1
@@ -487,6 +398,8 @@ with gr.Blocks(title="FLUX Quantization Challenge", theme=gr.themes.Soft()) as d
                         print("Warning: HF_TOKEN not set. Skipping dataset logging.")
                     elif not results:
                         print("Warning: Results state is empty. Skipping dataset logging.")
                     else:
                         print(f"Logging guess to HF Dataset: {HF_DATASET_REPO_ID}")
                         original_image = None
@@ -525,32 +438,22 @@ with gr.Blocks(title="FLUX Quantization Challenge", theme=gr.themes.Soft()) as d
                                 "quantized_image_displayed_position": [f"Image {quantized_image_pos + 1}"],
                                 "user_guess_displayed_position": [choice_string],
                                 "correct_guess": [got_it_right],
-                                "username": [username.strip() if username else None],
                             }
                             try:
-                                # Attempt to load existing dataset
                                 existing_ds = load_dataset(
                                     HF_DATASET_REPO_ID,
                                     split="train",
                                     token=HF_TOKEN,
                                     features=expected_features,
-                                    # verification_mode="no_checks" # Consider removing or using default
-                                    # download_mode="force_redownload" # For debugging cache issues
                                 )
-                                # Create a new dataset from the new item, casting to the expected features
                                 new_row_ds = Dataset.from_dict(new_data_dict_of_lists, features=expected_features)
-                                # Concatenate
                                 combined_ds = concatenate_datasets([existing_ds, new_row_ds])
-                                # Push the combined dataset
                                 combined_ds.push_to_hub(HF_DATASET_REPO_ID, token=HF_TOKEN, split="train")
                                 print(f"Successfully appended guess to {HF_DATASET_REPO_ID} (train split)")
                             except Exception as e:
                                 print(f"Could not load or append to existing dataset/split. Creating 'train' split with the new item. Error: {e}")
-                                # Create dataset from only the new item, with explicit features
                                 ds_new = Dataset.from_dict(new_data_dict_of_lists, features=expected_features)
-                                # Push this new dataset as the 'train' split
                                 ds_new.push_to_hub(HF_DATASET_REPO_ID, token=HF_TOKEN, split="train")
                                 print(f"Successfully created and logged new 'train' split to {HF_DATASET_REPO_ID}")
                         else:
@@ -564,136 +467,45 @@ with gr.Blocks(title="FLUX Quantization Challenge", theme=gr.themes.Soft()) as d
                 session_msg = ", ".join(
                     f"{k}: {_fmt(v)}" for k, v in sess.items()
                 )
-                current_agg_stats = _load_agg_stats()
-                username_input_update = gr.update(visible=False, interactive=True)
-                add_score_button_update = gr.update(visible=False)
-                current_feedback_text = add_score_feedback.value if hasattr(add_score_feedback, 'value') and add_score_feedback.value else ""
-                add_score_feedback_update = gr.update(visible=has_added_score_curr, value=current_feedback_text)
-                session_total_attempts = sum(stats["attempts"] for stats in sess.values())
-                if not is_example and not has_added_score_curr:
-                    if session_total_attempts >= 1 :
-                        username_input_update = gr.update(visible=True, interactive=True)
-                        add_score_button_update = gr.update(visible=True, interactive=True)
-                        add_score_feedback_update = gr.update(visible=False, value="")
-                    else:
-                        username_input_update = gr.update(visible=False, value=username_input.value if hasattr(username_input, 'value') else "")
-                        add_score_button_update = gr.update(visible=False)
-                        add_score_feedback_update = gr.update(visible=False, value="")
-                elif has_added_score_curr:
-                    username_input_update = gr.update(visible=True, interactive=False, value=username_input.value if hasattr(username_input, 'value') else "")
-                    add_score_button_update = gr.update(visible=True, interactive=False)
-                    add_score_feedback_update = gr.update(visible=True)
-                quant_data, overall_user_data, _ = update_leaderboards_data()
                 return (feedback,
                         gr.update(interactive=False),
                         gr.update(interactive=False),
                         session_msg,
-                        session_stats,
-                        quant_data,
-                        overall_user_data,
-                        username_input_update,
-                        add_score_button_update,
-                        add_score_feedback_update)
             image1_btn.click(
-                fn=lambda mapping, sess, is_ex, has_added, p, s, r, uname: choose("Image 1", mapping, sess, is_ex, has_added, p, s, r, uname),
-                inputs=[correct_mapping_state, session_stats_state, is_example_state, has_added_score_state,
-                        prompt_state, seed_state, results_state, username_input],
                 outputs=[feedback_box, image1_btn, image2_btn,
-                        session_score_box, session_stats_state,
-                        quant_df, user_df,
-                        username_input, add_score_button, add_score_feedback],
             )
             image2_btn.click(
-                fn=lambda mapping, sess, is_ex, has_added, p, s, r, uname: choose("Image 2", mapping, sess, is_ex, has_added, p, s, r, uname),
-                inputs=[correct_mapping_state, session_stats_state, is_example_state, has_added_score_state,
-                        prompt_state, seed_state, results_state, username_input],
                 outputs=[feedback_box, image1_btn, image2_btn,
-                        session_score_box, session_stats_state,
-                        quant_df, user_df,
-                        username_input, add_score_button, add_score_feedback],
             )
-            def handle_add_score_to_leaderboard(username_str, current_session_stats_dict):
-                if not username_str or not username_str.strip():
-                    return ("Username is required.",
-                            gr.update(interactive=True),
-                            gr.update(interactive=True),
-                            False,
-                            None, None)
-                user_stats = _load_user_stats()
-                user_key = username_str.strip()
-                session_total_session_attempts = sum(stats["attempts"] for stats in current_session_stats_dict.values())
-                if session_total_session_attempts == 0:
-                     return ("No attempts made in this session to add to leaderboard.",
-                            gr.update(interactive=True),
-                            gr.update(interactive=True),
-                            False, None, None)
-                if user_key not in user_stats:
-                    user_stats[user_key] = {}
-                for method, stats in current_session_stats_dict.items():
-                    session_method_correct = stats["correct"]
-                    session_method_attempts = stats["attempts"]
-                    if session_method_attempts == 0:
-                        continue
-                    if method not in user_stats[user_key]:
-                        user_stats[user_key][method] = {"correct": 0, "attempts": 0}
-                    user_stats[user_key][method]["correct"] += session_method_correct
-                    user_stats[user_key][method]["attempts"] += session_method_attempts
-                _save_user_stats(user_stats)
-                new_quant_data, new_overall_user_data, _ = update_leaderboards_data()
-                feedback_msg = f"Score for '{user_key}' submitted to leaderboard!"
-                return (feedback_msg,
-                        gr.update(interactive=False),
-                        gr.update(interactive=False),
-                        True,
-                        new_quant_data,
-                        new_overall_user_data)
-            add_score_button.click(
-                fn=handle_add_score_to_leaderboard,
-                inputs=[username_input, session_stats_state],
-                outputs=[add_score_feedback, username_input, add_score_button, has_added_score_state, quant_df, user_df]
-            )
         with gr.TabItem("Leaderboard"):
             gr.Markdown("## Quantization Method Leaderboard  *(Lower % ⇒ harder to detect)*")
             leaderboard_tab_quant_df = gr.DataFrame(
                 headers=["Method", "Correct Guesses", "Total Attempts", "Detectability %"],
                 interactive=False, col_count=(4, "fixed"), label="Quantization Method Leaderboard"
             )
-            gr.Markdown("---")
-            leaderboard_tab_user_df_8bit = gr.DataFrame(
-                headers=["User", "Correct Guesses", "Total Attempts", "Accuracy %"],
-                interactive=False, col_count=(4, "fixed"), label="8-bit bnb User Leaderboard"
-            )
-            leaderboard_tab_user_df_4bit = gr.DataFrame(
-                headers=["User", "Correct Guesses", "Total Attempts", "Accuracy %"],
-                interactive=False, col_count=(4, "fixed"), label="4-bit bnb User Leaderboard"
-            )
             def update_all_leaderboards_for_tab():
-                q_rows, _, per_method_u_dict = update_leaderboards_data()
-                user_rows_8bit = per_method_u_dict.get("8-bit bnb", [])
-                user_rows_4bit = per_method_u_dict.get("4-bit bnb", [])
-                return q_rows, user_rows_8bit, user_rows_4bit
             demo.load(update_all_leaderboards_for_tab, outputs=[
-                leaderboard_tab_quant_df,
-                leaderboard_tab_user_df_8bit,
-                leaderboard_tab_user_df_4bit
             ])
 if __name__ == "__main__":

         with open(AGG_FILE, "w") as f:
             json.dump(stats, f, indent=2)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {DEVICE}")
 DEFAULT_NUM_INFERENCE_STEPS = 15
 DEFAULT_MAX_SEQUENCE_LENGTH = 512
 HF_TOKEN = os.environ.get("HF_ACCESS_TOKEN")
+HF_DATASET_REPO_ID = "diffusers/flux-quant-challenge-submissions"
 CACHED_PIPES = {}
 def load_bf16_pipeline():
             torch_dtype=torch.bfloat16
         )
         pipe.to(DEVICE)
         end_time = time.time()
         mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
         print(f"8-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
             torch_dtype=torch.bfloat16
         )
         pipe.to(DEVICE)
         end_time = time.time()
         mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
         print(f"4-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
 @spaces.GPU(duration=240)
 def generate_images(prompt, quantization_choice, progress=gr.Progress(track_tqdm=True)):
     if not prompt:
+        return None, {}, gr.update(value="Please enter a prompt.", interactive=False), None, [], gr.update(interactive=True), gr.update(interactive=True)
     if not quantization_choice:
+        return None, {}, gr.update(value="Please select a quantization method.", interactive=False), None, [], gr.update(interactive=True), gr.update(interactive=True)
     if quantization_choice == "8-bit bnb":
         quantized_load_func = load_bnb_8bit_pipeline
         quantized_load_func = load_bnb_4bit_pipeline
         quantized_label = "Quantized (4-bit bnb)"
     else:
+        return None, {}, gr.update(value="Invalid quantization choice.", interactive=False), None, [], gr.update(interactive=True), gr.update(interactive=True)
     model_configs = [
         ("Original", load_bf16_pipeline),
         except Exception as e:
             print(f"Error during {label} model processing: {e}")
+            return None, {}, gr.update(value=f"Error processing {label} model: {e}", interactive=False), None, [], gr.update(interactive=True), gr.update(interactive=True)
     if len(results) != len(model_configs):
+        return None, {}, gr.update(value="Failed to generate images for all model types.", interactive=False), None, [], gr.update(interactive=True), gr.update(interactive=True)
     shuffled_results = results.copy()
     random.shuffle(shuffled_results)
         return f"{pct:.1f}%", pct
     return "N/A", -1.0
 def update_leaderboards_data():
     agg = _load_agg_stats()
     quant_rows = []
             acc_str
         ])
     quant_rows.sort(key=lambda r: r[1]/r[2] if r[2] != 0 else 1e9)
+    return quant_rows
 quant_df = gr.DataFrame(
     headers=["Method", "Correct Guesses", "Total Attempts", "Detectability %"],
     interactive=False, col_count=(4, "fixed")
 )
 with gr.Blocks(title="FLUX Quantization Challenge", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# FLUX Model Quantization Challenge")
             with gr.Row():
                 session_score_box  = gr.Textbox(label="Your accuracy this session", interactive=False)
+            gr.Markdown("""
+            ### Dataset Information
+            Unless you opt out below, your submissions will be recorded in a dataset. This dataset contains anonymized challenge results including prompts, images, quantization methods,
+            and whether guesses were correct.
+            """)
+            opt_out_checkbox = gr.Checkbox(
+                label="Opt out of data collection (don't record my submissions to the dataset)",
+                value=False
             )
             correct_mapping_state = gr.State({})
                 "4-bit bnb": {"attempts": 0, "correct": 0}}
             )
             is_example_state = gr.State(False)
             prompt_state = gr.State("")
             seed_state = gr.State(None)
             results_state = gr.State([])
             def _load_example_and_update_dfs(sel_summary):
                 idx = next((i for i, ex in enumerate(EXAMPLES) if ex["summary"] == sel_summary), -1)
                 if idx == -1:
                     print(f"Error: Example with summary '{sel_summary}' not found.")
+                    return (gr.update(), gr.update(), gr.update(), False, gr.update(), "", None, [])
                 ex = EXAMPLES[idx]
                 gallery_items, mapping, prompt = load_example(idx)
+                quant_data = update_leaderboards_data()
+                return gallery_items, mapping, prompt, True, quant_data, "", None, []
             ex_selector.change(
                 fn=_load_example_and_update_dfs,
                 inputs=ex_selector,
+                outputs=[output_gallery, correct_mapping_state, prompt_input, is_example_state, quant_df,
+                         prompt_state, seed_state, results_state],
             ).then(
                 lambda: (gr.update(interactive=True), gr.update(interactive=True)),
                 outputs=[image1_btn, image2_btn],
                 fn=generate_images,
                 inputs=[prompt_input, quantization_choice_radio],
                 outputs=[output_gallery, correct_mapping_state, prompt_state, seed_state, results_state,
+                         feedback_box]
             ).then(
+                lambda: False, # for is_example_state
+                outputs=[is_example_state]
             ).then(
                 lambda: (gr.update(interactive=True),
+                         gr.update(interactive=True),
+                         ""),
                 outputs=[image1_btn, image2_btn, feedback_box],
             )
+            def choose(choice_string, mapping, session_stats, is_example,
+                       prompt, seed, results, opt_out):
                 feedback = check_guess(choice_string, mapping)
                 if not mapping:
+                    return feedback, gr.update(), gr.update(), "", session_stats, gr.update()
                 quant_label_from_mapping = next((label for label in mapping.values() if "Quantized" in label), None)
                 if not quant_label_from_mapping:
                     print("Error: Could not determine quantization label from mapping:", mapping)
                     return ("Internal Error: Could not process results.", gr.update(interactive=False), gr.update(interactive=False),
+                            "", session_stats, gr.update())
                 quant_key = "8-bit bnb" if "8-bit bnb" in quant_label_from_mapping else "4-bit bnb"
                 got_it_right = "Correct!" in feedback
                 sess = session_stats.copy()
+                if not is_example: # Only log and update stats if it's not an example run
                     sess[quant_key]["attempts"] += 1
                     if got_it_right:
                         sess[quant_key]["correct"] += 1
+                    session_stats = sess # Update the state for the UI
                     AGG_STATS = _load_agg_stats()
                     AGG_STATS[quant_key]["attempts"] += 1
                         print("Warning: HF_TOKEN not set. Skipping dataset logging.")
                     elif not results:
                         print("Warning: Results state is empty. Skipping dataset logging.")
+                    elif opt_out:
+                        print("User opted out of dataset logging. Skipping.")
                     else:
                         print(f"Logging guess to HF Dataset: {HF_DATASET_REPO_ID}")
                         original_image = None
                                 "quantized_image_displayed_position": [f"Image {quantized_image_pos + 1}"],
                                 "user_guess_displayed_position": [choice_string],
                                 "correct_guess": [got_it_right],
+                                "username": [None], # Log None for username
                             }
                             try:
                                 existing_ds = load_dataset(
                                     HF_DATASET_REPO_ID,
                                     split="train",
                                     token=HF_TOKEN,
                                     features=expected_features,
                                 )
                                 new_row_ds = Dataset.from_dict(new_data_dict_of_lists, features=expected_features)
                                 combined_ds = concatenate_datasets([existing_ds, new_row_ds])
                                 combined_ds.push_to_hub(HF_DATASET_REPO_ID, token=HF_TOKEN, split="train")
                                 print(f"Successfully appended guess to {HF_DATASET_REPO_ID} (train split)")
                             except Exception as e:
                                 print(f"Could not load or append to existing dataset/split. Creating 'train' split with the new item. Error: {e}")
                                 ds_new = Dataset.from_dict(new_data_dict_of_lists, features=expected_features)
                                 ds_new.push_to_hub(HF_DATASET_REPO_ID, token=HF_TOKEN, split="train")
                                 print(f"Successfully created and logged new 'train' split to {HF_DATASET_REPO_ID}")
                         else:
                 session_msg = ", ".join(
                     f"{k}: {_fmt(v)}" for k, v in sess.items()
                 )
+                quant_data = update_leaderboards_data()
                 return (feedback,
                         gr.update(interactive=False),
                         gr.update(interactive=False),
                         session_msg,
+                        session_stats, # Return the potentially updated session_stats
+                        quant_data)
             image1_btn.click(
+                fn=lambda mapping, sess, is_ex, p, s, r, opt_out: choose("Image 1", mapping, sess, is_ex, p, s, r, opt_out),
+                inputs=[correct_mapping_state, session_stats_state, is_example_state,
+                        prompt_state, seed_state, results_state, opt_out_checkbox],
                 outputs=[feedback_box, image1_btn, image2_btn,
+                         session_score_box, session_stats_state,
+                         quant_df],
             )
             image2_btn.click(
+                fn=lambda mapping, sess, is_ex, p, s, r, opt_out: choose("Image 2", mapping, sess, is_ex, p, s, r, opt_out),
+                inputs=[correct_mapping_state, session_stats_state, is_example_state,
+                        prompt_state, seed_state, results_state, opt_out_checkbox],
                 outputs=[feedback_box, image1_btn, image2_btn,
+                         session_score_box, session_stats_state,
+                         quant_df],
             )
         with gr.TabItem("Leaderboard"):
             gr.Markdown("## Quantization Method Leaderboard  *(Lower % ⇒ harder to detect)*")
             leaderboard_tab_quant_df = gr.DataFrame(
                 headers=["Method", "Correct Guesses", "Total Attempts", "Detectability %"],
                 interactive=False, col_count=(4, "fixed"), label="Quantization Method Leaderboard"
             )
             def update_all_leaderboards_for_tab():
+                q_rows = update_leaderboards_data()
+                return q_rows # Only return quantization method data
             demo.load(update_all_leaderboards_for_tab, outputs=[
+                leaderboard_tab_quant_df,
             ])
 if __name__ == "__main__":