kimhyunwoo commited on
Commit
a41650d
·
verified ·
1 Parent(s): b5f6ba9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -53
app.py CHANGED
@@ -2,26 +2,20 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, logging
2
  from huggingface_hub import login
3
  import torch
4
  import os
 
5
 
6
- # --- 1. Authentication (Choose ONE method and follow the instructions) ---
7
 
8
- # Method 1: Environment Variable (RECOMMENDED for security and Hugging Face Spaces)
9
- # - Set the HUGGING_FACE_HUB_TOKEN environment variable *before* running.
10
- # - Linux/macOS: `export HUGGING_FACE_HUB_TOKEN=your_token` (in terminal)
11
- # - Windows (PowerShell): `$env:HUGGING_FACE_HUB_TOKEN = "your_token"`
12
- # - Hugging Face Spaces: Add `HUGGING_FACE_HUB_TOKEN` as a secret in your Space's settings.
13
- # - Then, uncomment the following line:
14
- login()
15
-
16
- # Method 2: Direct Token (ONLY for local testing, NOT for deployment)
17
- # - Replace "YOUR_HUGGING_FACE_TOKEN" with your actual token.
18
- # - WARNING: Do NOT commit your token to a public repository!
19
- # login(token="YOUR_HUGGING_FACE_TOKEN")
20
 
21
- # Method 3: huggingface-cli (Interactive, one-time setup, good for local development)
22
- # - Run `huggingface-cli login` in your terminal.
23
- # - Paste your token when prompted.
24
- # - No code changes are needed after this; the token is stored.
25
 
26
  # --- 2. Model and Tokenizer Setup (with comprehensive error handling) ---
27
 
@@ -46,9 +40,10 @@ def load_model_and_tokenizer(model_name="google/gemma-3-1b-it"):
46
  print("1. Ensure you have a Hugging Face account and have accepted the model's terms.")
47
  print("2. Verify your internet connection.")
48
  print("3. Double-check the model name: 'google/gemma-3-1b-it'")
49
- print("4. Ensure you are properly authenticated (see authentication section above).")
50
  print("5. If using a GPU, ensure your CUDA drivers and PyTorch are correctly installed.")
51
- exit(1) # Exit with an error code
 
52
 
53
  model, tokenizer = load_model_and_tokenizer()
54
 
@@ -56,24 +51,13 @@ model, tokenizer = load_model_and_tokenizer()
56
  # --- 3. Chat Template Function (CRITICAL for conversational models) ---
57
 
58
  def apply_chat_template(messages, tokenizer):
59
- """Applies the appropriate chat template to the message history.
60
-
61
- Args:
62
- messages: A list of dictionaries, where each dictionary has 'role' (user/model)
63
- and 'content' keys.
64
- tokenizer: The tokenizer object.
65
-
66
- Returns:
67
- A formatted prompt string ready for the model.
68
- """
69
  try:
70
  if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
71
- # Use the tokenizer's built-in chat template if available
72
  return tokenizer.apply_chat_template(
73
  messages, tokenize=False, add_generation_prompt=True
74
  )
75
  else:
76
- # Fallback to a standard chat template if no specific one is found
77
  print("WARNING: Tokenizer does not have a defined chat_template. Using a fallback.")
78
  chat_template = "{% for message in messages %}" \
79
  "{{ '<start_of_turn>' + message['role'] + '\n' + message['content'] + '<end_of_turn>\n' }}" \
@@ -83,14 +67,13 @@ def apply_chat_template(messages, tokenizer):
83
 
84
  except Exception as e:
85
  print(f"ERROR: Failed to apply chat template: {e}")
86
- exit(1)
87
 
88
 
89
  # --- 4. Text Generation Function ---
90
 
91
  def generate_response(messages, model, tokenizer, max_new_tokens=256, temperature=0.7, top_k=50, top_p=0.95, repetition_penalty=1.2):
92
- """Generates a response using the model and tokenizer."""
93
-
94
  prompt = apply_chat_template(messages, tokenizer)
95
 
96
  try:
@@ -98,8 +81,8 @@ def generate_response(messages, model, tokenizer, max_new_tokens=256, temperatur
98
  "text-generation",
99
  model=model,
100
  tokenizer=tokenizer,
101
- torch_dtype=torch.bfloat16, # Make sure pipeline also uses correct dtype
102
- device_map="auto", # and device mapping
103
  model_kwargs={"attn_implementation": "flash_attention_2"}
104
  )
105
 
@@ -111,33 +94,43 @@ def generate_response(messages, model, tokenizer, max_new_tokens=256, temperatur
111
  top_k=top_k,
112
  top_p=top_p,
113
  repetition_penalty=repetition_penalty,
114
- pad_token_id=tokenizer.eos_token_id, # Important for proper padding
115
  )
116
 
117
- # Extract *only* the generated text (remove the prompt)
118
  generated_text = outputs[0]["generated_text"][len(prompt):].strip()
119
  return generated_text
120
 
121
  except Exception as e:
122
  print(f"ERROR: Failed to generate response: {e}")
123
- return "Sorry, I encountered an error while generating a response."
 
 
 
124
 
 
 
 
 
 
 
 
 
 
125
 
126
- # --- 5. Main Interaction Loop (for command-line interaction) ---
127
- def main():
128
- """Main function for interactive command-line chat."""
 
 
 
 
129
 
130
- messages = [] # Initialize the conversation history
131
 
132
- while True:
133
- user_input = input("You: ")
134
- if user_input.lower() in ("exit", "quit", "bye"):
135
- break
136
 
137
- messages.append({"role": "user", "content": user_input})
138
- response = generate_response(messages, model, tokenizer)
139
- print(f"Model: {response}")
140
- messages.append({"role": "model", "content": response})
141
 
142
- if __name__ == "__main__":
143
- main()
 
2
  from huggingface_hub import login
3
  import torch
4
  import os
5
+ import gradio as gr
6
 
7
+ # --- 1. Authentication (Using Environment Variable - the ONLY correct way for Spaces) ---
8
 
9
+ # Hugging Face Spaces CANNOT use interactive login. You MUST use an environment variable.
10
+ # 1. Go to your Space's settings.
11
+ # 2. Click on "Repository Secrets".
12
+ # 3. Click "New Secret".
13
+ # 4. Name the secret: HUGGING_FACE_HUB_TOKEN
14
+ # 5. Paste your Hugging Face API token (with read access) as the value.
15
+ # 6. Save the secret.
 
 
 
 
 
16
 
17
+ # The login() call below will now automatically use the environment variable.
18
+ login()
 
 
19
 
20
  # --- 2. Model and Tokenizer Setup (with comprehensive error handling) ---
21
 
 
40
  print("1. Ensure you have a Hugging Face account and have accepted the model's terms.")
41
  print("2. Verify your internet connection.")
42
  print("3. Double-check the model name: 'google/gemma-3-1b-it'")
43
+ print("4. Ensure you are properly authenticated using a Repository Secret (see above).")
44
  print("5. If using a GPU, ensure your CUDA drivers and PyTorch are correctly installed.")
45
+ # Instead of exiting, raise the exception to be caught by Gradio
46
+ raise
47
 
48
  model, tokenizer = load_model_and_tokenizer()
49
 
 
51
  # --- 3. Chat Template Function (CRITICAL for conversational models) ---
52
 
53
  def apply_chat_template(messages, tokenizer):
54
+ """Applies the appropriate chat template."""
 
 
 
 
 
 
 
 
 
55
  try:
56
  if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
 
57
  return tokenizer.apply_chat_template(
58
  messages, tokenize=False, add_generation_prompt=True
59
  )
60
  else:
 
61
  print("WARNING: Tokenizer does not have a defined chat_template. Using a fallback.")
62
  chat_template = "{% for message in messages %}" \
63
  "{{ '<start_of_turn>' + message['role'] + '\n' + message['content'] + '<end_of_turn>\n' }}" \
 
67
 
68
  except Exception as e:
69
  print(f"ERROR: Failed to apply chat template: {e}")
70
+ raise # Re-raise to be caught by Gradio
71
 
72
 
73
  # --- 4. Text Generation Function ---
74
 
75
  def generate_response(messages, model, tokenizer, max_new_tokens=256, temperature=0.7, top_k=50, top_p=0.95, repetition_penalty=1.2):
76
+ """Generates a response."""
 
77
  prompt = apply_chat_template(messages, tokenizer)
78
 
79
  try:
 
81
  "text-generation",
82
  model=model,
83
  tokenizer=tokenizer,
84
+ torch_dtype=torch.bfloat16,
85
+ device_map="auto",
86
  model_kwargs={"attn_implementation": "flash_attention_2"}
87
  )
88
 
 
94
  top_k=top_k,
95
  top_p=top_p,
96
  repetition_penalty=repetition_penalty,
97
+ pad_token_id=tokenizer.eos_token_id,
98
  )
99
 
 
100
  generated_text = outputs[0]["generated_text"][len(prompt):].strip()
101
  return generated_text
102
 
103
  except Exception as e:
104
  print(f"ERROR: Failed to generate response: {e}")
105
+ raise # Re-raise the exception
106
+
107
+
108
+ # --- 5. Gradio Interface ---
109
 
110
+ def predict(message, history):
111
+ if not history:
112
+ history = []
113
+ messages = []
114
+ for user_msg, bot_response in history:
115
+ messages.append({"role": "user", "content": user_msg})
116
+ if bot_response: # Check if bot_response is not None
117
+ messages.append({"role": "model", "content": bot_response})
118
+ messages.append({"role": "user", "content": message})
119
 
120
+ try:
121
+ response = generate_response(messages, model, tokenizer)
122
+ history.append((message, response))
123
+ return "", history
124
+ except Exception as e:
125
+ # Catch any exceptions during generation and display in the UI
126
+ return f"Error: {e}", history
127
 
 
128
 
129
+ with gr.Blocks() as demo:
130
+ chatbot = gr.Chatbot(label="Gemma Chatbot", height=500)
131
+ msg = gr.Textbox(placeholder="Ask me anything!", container=False, scale=7)
132
+ clear = gr.ClearButton([msg, chatbot])
133
 
134
+ msg.submit(predict, [msg, chatbot], [msg, chatbot])
 
 
 
135
 
136
+ demo.launch()