Spaces:

LiKenun
/

ai-building-blocks

Running on Zero

App Files Files Community

LiKenun commited on Nov 3

Commit

1509884

1 Parent(s): 4c71b8b

AI-generated chat sample revision 1: support both seq2seq and causal LM models

Browse files

Files changed (1) hide show

chatbot.py +89 -28

chatbot.py CHANGED Viewed

@@ -1,26 +1,51 @@
 from os import getenv
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from utils import get_pytorch_device, spaces_gpu
 # Global chatbot instance (initialized once)
 _chatbot = None
 _tokenizer = None
 def get_chatbot():
-    global _chatbot, _tokenizer
     if _chatbot is None:
         model_id = getenv("CHAT_MODEL")
         device = get_pytorch_device()
         _tokenizer = AutoTokenizer.from_pretrained(model_id)
-        _chatbot = AutoModelForSeq2SeqLM.from_pretrained(
-            model_id,
-            use_safetensors=True  # Use safetensors to avoid torch.load restriction
-        ).to(device)
-    return _chatbot, _tokenizer
 @spaces_gpu
 def chat(message: str, conversation_history: list[dict] | None) -> tuple[str, list[dict]]:
-    model, tokenizer = get_chatbot()
     # Initialize conversation history if this is the first message
     if conversation_history is None:
@@ -29,36 +54,72 @@ def chat(message: str, conversation_history: list[dict] | None) -> tuple[str, li
     # Add the user's message
     conversation_history.append({"role": "user", "content": message})
-    # For BlenderBot models, format conversation as dialogue history
-    # Build the full conversation context as a string
-    dialogue_text = ""
-    for msg in conversation_history:
-        if msg["role"] == "user":
-            dialogue_text += f"User: {msg['content']}\n"
-        elif msg["role"] == "assistant":
-            dialogue_text += f"Assistant: {msg['content']}\n"
-    # Tokenize the input
-    inputs = tokenizer([dialogue_text], return_tensors="pt", truncation=True, max_length=512)
     device = get_pytorch_device()
-    inputs = {k: v.to(device) for k, v in inputs.items()}
     # Generate response
     outputs = model.generate(
         **inputs,
-        max_new_tokens=128,
         do_sample=True,
         temperature=0.7,
         pad_token_id=tokenizer.eos_token_id
     )
-    # Decode the generated tokens - for seq2seq models, this should be just the assistant's response
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Clean up the response - remove any "Assistant:" prefix if present
-    response = response.strip()
-    if response.startswith("Assistant:"):
-        response = response[len("Assistant:"):].strip()
     # Add the assistant's response to history
     conversation_history.append({"role": "assistant", "content": response})

 from os import getenv
+from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
 from utils import get_pytorch_device, spaces_gpu
 # Global chatbot instance (initialized once)
 _chatbot = None
 _tokenizer = None
+_is_seq2seq = None
 def get_chatbot():
+    """Get or create the chatbot model instance. Supports both causal LM and seq2seq models."""
+    global _chatbot, _tokenizer, _is_seq2seq
     if _chatbot is None:
         model_id = getenv("CHAT_MODEL")
         device = get_pytorch_device()
         _tokenizer = AutoTokenizer.from_pretrained(model_id)
+        # Try to determine model type and load accordingly
+        # Check tokenizer config or model config to see if it's seq2seq
+        try:
+            from transformers import AutoConfig
+            config = AutoConfig.from_pretrained(model_id)
+            # Seq2seq models have encoder/decoder, causal LMs don't
+            _is_seq2seq = hasattr(config, 'is_encoder_decoder') and config.is_encoder_decoder
+        except Exception:
+            # Default to causal LM (most modern chat models)
+            _is_seq2seq = False
+        if _is_seq2seq:
+            _chatbot = AutoModelForSeq2SeqLM.from_pretrained(
+                model_id,
+                use_safetensors=True
+            ).to(device)
+        else:
+            _chatbot = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                use_safetensors=True
+            ).to(device)
+        # Set pad token if not set
+        if _tokenizer.pad_token is None:
+            _tokenizer.pad_token = _tokenizer.eos_token
+    return _chatbot, _tokenizer, _is_seq2seq
 @spaces_gpu
 def chat(message: str, conversation_history: list[dict] | None) -> tuple[str, list[dict]]:
+    model, tokenizer, is_seq2seq = get_chatbot()
     # Initialize conversation history if this is the first message
     if conversation_history is None:
     # Add the user's message
     conversation_history.append({"role": "user", "content": message})
     device = get_pytorch_device()
+    # Check if tokenizer has a chat template (modern chat models)
+    use_chat_template = hasattr(tokenizer, 'chat_template') and tokenizer.chat_template is not None
+    if use_chat_template:
+        # Use chat template for modern chat models (Qwen, Mistral, etc.)
+        try:
+            formatted_input = tokenizer.apply_chat_template(
+                conversation_history,
+                tokenize=False,
+                add_generation_prompt=True
+            )
+            inputs = tokenizer(formatted_input, return_tensors="pt", truncation=True).to(device)
+        except Exception:
+            use_chat_template = False
+    if not use_chat_template:
+        # For models without chat templates (BlenderBot, older models)
+        if is_seq2seq:
+            # Seq2seq format: "User: ...\nAssistant: ..."
+            dialogue_text = ""
+            for msg in conversation_history:
+                if msg["role"] == "user":
+                    dialogue_text += f"User: {msg['content']}\n"
+                elif msg["role"] == "assistant":
+                    dialogue_text += f"Assistant: {msg['content']}\n"
+            inputs = tokenizer([dialogue_text], return_tensors="pt", truncation=True, max_length=512).to(device)
+        else:
+            # Causal LM format: just concatenate messages
+            dialogue_text = ""
+            for msg in conversation_history:
+                if msg["role"] == "user":
+                    dialogue_text += f"User: {msg['content']}\n\n"
+                elif msg["role"] == "assistant":
+                    dialogue_text += f"Assistant: {msg['content']}\n\n"
+            dialogue_text += "Assistant:"
+            inputs = tokenizer(dialogue_text, return_tensors="pt", truncation=True, max_length=1024).to(device)
     # Generate response
     outputs = model.generate(
         **inputs,
+        max_new_tokens=256,
         do_sample=True,
         temperature=0.7,
         pad_token_id=tokenizer.eos_token_id
     )
+    # Decode the response
+    if is_seq2seq:
+        # For seq2seq, output is just the generated response
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Clean up any "Assistant:" prefix
+        if response.startswith("Assistant:"):
+            response = response[len("Assistant:"):].strip()
+    else:
+        # For causal LMs, extract only the newly generated part
+        if use_chat_template:
+            # Extract only new tokens (generated part)
+            input_length = inputs.input_ids.shape[1]
+            generated_tokens = outputs[0][input_length:]
+            response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
+        else:
+            # Extract text after the prompt
+            full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            response = full_text.split("Assistant:")[-1].strip()
     # Add the assistant's response to history
     conversation_history.append({"role": "assistant", "content": response})