Spaces:

kayfahaarukku
/

vtuber-tag-gen

Sleeping

App Files Files Community

kayfahaarukku commited on Jun 29

Commit

0b52264

verified ·

1 Parent(s): 638c2b7

Upload 2 files

Browse files

Files changed (2) hide show

gradio_app.py +120 -120
test_lora.py +7 -4

gradio_app.py CHANGED Viewed

@@ -1,121 +1,121 @@
-import gradio as gr
-from test_lora import DanbooruTagTester
-import sys
-import io
-import spaces
-# Gradio's state management will hold the instance of our tester
-# This is better than a global variable as it's session-specific
-@spaces.GPU(duration=300) # Request GPU for model loading, with a 5-min timeout
-def load_model(model_path, base_model, use_4bit, progress=gr.Progress(track_tqdm=True)):
-    """
-    Loads the model and updates the UI.
-    Captures stdout to display loading progress in the UI.
-    """
-    # Redirect stdout to capture print statements from the model loading process
-    old_stdout = sys.stdout
-    sys.stdout = captured_output = io.StringIO()
-    tester = None
-    status_message = ""
-    success = False
-    try:
-        tester = DanbooruTagTester(
-            model_path=model_path,
-            base_model_id=base_model,
-            use_4bit=use_4bit,
-            non_interactive=True  # Ensure no input() calls hang the app
-        )
-        status_message = "Model loaded successfully!"
-        success = True
-    except Exception as e:
-        status_message = f"Error loading model: {e}"
-    finally:
-        # Restore stdout
-        sys.stdout = old_stdout
-    # Get captured output and combine with status message
-    log_output = captured_output.getvalue()
-    final_status = log_output + "\n" + status_message
-    # Return the loaded model instance, the status message, and UI updates
-    return tester, final_status, gr.update(interactive=success), gr.update(interactive=success), gr.update(interactive=success), gr.update(interactive=success), gr.update(interactive=success), gr.update(interactive=success)
-@spaces.GPU # Request GPU for generation
-def generate_tags(tester, prompt, max_new_tokens, temperature, top_k, top_p, do_sample):
-    """
-    Generates tags using the loaded model.
-    """
-    if tester is None:
-        return "Error: Model not loaded. Please load a model first."
-    try:
-        completion = tester.generate_tags(
-            input_prompt=prompt,
-            max_new_tokens=int(max_new_tokens),
-            temperature=temperature,
-            top_k=int(top_k),
-            top_p=top_p,
-            do_sample=do_sample
-        )
-        return completion
-    except Exception as e:
-        return f"Error during generation: {e}"
-# --- Gradio Interface Definition ---
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    tester_state = gr.State(None)
-    gr.Markdown("# Danbooru Tag Autocompletion UI")
-    gr.Markdown("Load a LoRA model and generate Danbooru tag completions.")
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("## 1. Load Model")
-            # Using user's github username "nawka12" as default model path from memory
-            model_path_input = gr.Textbox(label="Model Path (HF Hub or local)", value="kayfahaarukku/chek-8")
-            base_model_input = gr.Textbox(label="Base Model ID", value="google/gemma-3-1b-it")
-            use_4bit_checkbox = gr.Checkbox(label="Use 4-bit Quantization", value=True)
-            load_button = gr.Button("Load Model", variant="primary")
-        with gr.Column(scale=2):
-            gr.Markdown("## 2. Generate Tags")
-            # Generation UI is disabled until model is loaded
-            prompt_input = gr.Textbox(label="Input Prompt", lines=2, placeholder="e.g., 1girl, hatsune miku, vocaloid", interactive=False)
-            generate_button = gr.Button("Generate", variant="primary", interactive=False)
-            with gr.Accordion("Generation Settings", open=False):
-                max_new_tokens_slider = gr.Slider(minimum=10, maximum=500, value=150, step=10, label="Max New Tokens", interactive=False)
-                temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.8, step=0.1, label="Temperature", interactive=False)
-                top_k_slider = gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top-K", interactive=False)
-                top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P", interactive=False)
-                do_sample_checkbox = gr.Checkbox(label="Use Sampling", value=True, interactive=False)
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### Status & Logs")
-            status_output = gr.Textbox(label="Loading Log", lines=8, interactive=False, max_lines=20)
-        with gr.Column():
-            gr.Markdown("### Generated Tags")
-            completion_output = gr.Textbox(label="Output", lines=8, interactive=False, max_lines=20)
-    # --- Event Handlers ---
-    generation_inputs = [prompt_input, generate_button, max_new_tokens_slider, temperature_slider, top_k_slider, top_p_slider, do_sample_checkbox]
-    load_button.click(
-        fn=load_model,
-        inputs=[model_path_input, base_model_input, use_4bit_checkbox],
-        outputs=[tester_state, status_output] + generation_inputs
-    )
-    generate_button.click(
-        fn=generate_tags,
-        inputs=[tester_state, prompt_input, max_new_tokens_slider, temperature_slider, top_k_slider, top_p_slider, do_sample_checkbox],
-        outputs=completion_output
-    )
-if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0")

+import gradio as gr
+from test_lora import DanbooruTagTester
+import sys
+import io
+import spaces
+# Gradio's state management will hold the instance of our tester
+# This is better than a global variable as it's session-specific
+@spaces.GPU(duration=300) # Request GPU for model loading, with a 5-min timeout
+def load_model(model_path, base_model, use_4bit, progress=gr.Progress(track_tqdm=True)):
+    """
+    Loads the model and updates the UI.
+    Captures stdout to display loading progress in the UI.
+    """
+    # Redirect stdout to capture print statements from the model loading process
+    old_stdout = sys.stdout
+    sys.stdout = captured_output = io.StringIO()
+    tester = None
+    status_message = ""
+    success = False
+    try:
+        tester = DanbooruTagTester(
+            model_path=model_path,
+            base_model_id=base_model,
+            use_4bit=use_4bit,
+            non_interactive=True  # Ensure no input() calls hang the app
+        )
+        status_message = "Model loaded successfully!"
+        success = True
+    except Exception as e:
+        status_message = f"Error loading model: {e}"
+    finally:
+        # Restore stdout
+        sys.stdout = old_stdout
+    # Get captured output and combine with status message
+    log_output = captured_output.getvalue()
+    final_status = log_output + "\n" + status_message
+    # Return the loaded model instance, the status message, and UI updates
+    return tester, final_status, gr.update(interactive=success), gr.update(interactive=success), gr.update(interactive=success), gr.update(interactive=success), gr.update(interactive=success), gr.update(interactive=success), gr.update(interactive=success)
+@spaces.GPU # Request GPU for generation
+def generate_tags(tester, prompt, max_new_tokens, temperature, top_k, top_p, do_sample):
+    """
+    Generates tags using the loaded model.
+    """
+    if tester is None:
+        return "Error: Model not loaded. Please load a model first."
+    try:
+        completion = tester.generate_tags(
+            input_prompt=prompt,
+            max_new_tokens=int(max_new_tokens),
+            temperature=temperature,
+            top_k=int(top_k),
+            top_p=top_p,
+            do_sample=do_sample
+        )
+        return completion
+    except Exception as e:
+        return f"Error during generation: {e}"
+# --- Gradio Interface Definition ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    tester_state = gr.State(None)
+    gr.Markdown("# Danbooru Tag Autocompletion UI")
+    gr.Markdown("Load a LoRA model and generate Danbooru tag completions.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("## 1. Load Model")
+            # Using user's github username "nawka12" as default model path from memory
+            model_path_input = gr.Textbox(label="Model Path (HF Hub or local)", value="kayfahaarukku/chek-8")
+            base_model_input = gr.Textbox(label="Base Model ID", value="google/gemma-3-1b-it")
+            use_4bit_checkbox = gr.Checkbox(label="Use 4-bit Quantization", value=True)
+            load_button = gr.Button("Load Model", variant="primary")
+        with gr.Column(scale=2):
+            gr.Markdown("## 2. Generate Tags")
+            # Generation UI is disabled until model is loaded
+            prompt_input = gr.Textbox(label="Input Prompt", lines=2, placeholder="e.g., 1girl, hatsune miku, vocaloid", interactive=False)
+            generate_button = gr.Button("Generate", variant="primary", interactive=False)
+            with gr.Accordion("Generation Settings", open=False):
+                max_new_tokens_slider = gr.Slider(minimum=10, maximum=500, value=150, step=10, label="Max New Tokens", interactive=False)
+                temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.8, step=0.1, label="Temperature", interactive=False)
+                top_k_slider = gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top-K", interactive=False)
+                top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P", interactive=False)
+                do_sample_checkbox = gr.Checkbox(label="Use Sampling", value=True, interactive=False)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### Status & Logs")
+            status_output = gr.Textbox(label="Loading Log", lines=8, interactive=False, max_lines=20)
+        with gr.Column():
+            gr.Markdown("### Generated Tags")
+            completion_output = gr.Textbox(label="Output", lines=8, interactive=False, max_lines=20)
+    # --- Event Handlers ---
+    generation_inputs = [prompt_input, generate_button, max_new_tokens_slider, temperature_slider, top_k_slider, top_p_slider, do_sample_checkbox]
+    load_button.click(
+        fn=load_model,
+        inputs=[model_path_input, base_model_input, use_4bit_checkbox],
+        outputs=[tester_state, status_output] + generation_inputs
+    )
+    generate_button.click(
+        fn=generate_tags,
+        inputs=[tester_state, prompt_input, max_new_tokens_slider, temperature_slider, top_k_slider, top_p_slider, do_sample_checkbox],
+        outputs=completion_output
+    )
+if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0")

test_lora.py CHANGED Viewed

@@ -30,6 +30,8 @@ class DanbooruTagTester:
     def _load_model(self):
         """Load the base model, LoRA weights, and tokenizer"""
         # Configure quantization if requested
         if self.use_4bit:
             try:
@@ -53,14 +55,15 @@ class DanbooruTagTester:
             quantization_config=bnb_config,
             device_map="auto",
             torch_dtype=torch.bfloat16 if not self.use_4bit else None,
         )
         # Check if this is actually a LoRA model or just the base model
         try:
             # Try to load LoRA config to check if it's a LoRA model
-            peft_config = PeftConfig.from_pretrained(self.model_path)
             print("Loading LoRA weights...")
-            self.model = PeftModel.from_pretrained(self.base_model, self.model_path)
             print("LoRA model loaded successfully!")
         except Exception as e:
             print(f"Warning: Could not load LoRA weights from {self.model_path}")
@@ -91,10 +94,10 @@ class DanbooruTagTester:
         # Load tokenizer
         print("Loading tokenizer...")
         try:
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
         except Exception as e:
             print(f"Could not load tokenizer from model path, trying base model...")
-            self.tokenizer = AutoTokenizer.from_pretrained(self.base_model_id)
         if self.tokenizer.pad_token is None:
             self.tokenizer.pad_token = self.tokenizer.eos_token

     def _load_model(self):
         """Load the base model, LoRA weights, and tokenizer"""
+        hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
         # Configure quantization if requested
         if self.use_4bit:
             try:
             quantization_config=bnb_config,
             device_map="auto",
             torch_dtype=torch.bfloat16 if not self.use_4bit else None,
+            token=hf_token,
         )
         # Check if this is actually a LoRA model or just the base model
         try:
             # Try to load LoRA config to check if it's a LoRA model
+            peft_config = PeftConfig.from_pretrained(self.model_path, token=hf_token)
             print("Loading LoRA weights...")
+            self.model = PeftModel.from_pretrained(self.base_model, self.model_path, token=hf_token)
             print("LoRA model loaded successfully!")
         except Exception as e:
             print(f"Warning: Could not load LoRA weights from {self.model_path}")
         # Load tokenizer
         print("Loading tokenizer...")
         try:
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, token=hf_token)
         except Exception as e:
             print(f"Could not load tokenizer from model path, trying base model...")
+            self.tokenizer = AutoTokenizer.from_pretrained(self.base_model_id, token=hf_token)
         if self.tokenizer.pad_token is None:
             self.tokenizer.pad_token = self.tokenizer.eos_token