Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from peft import PeftConfig, PeftModel | |
import torch | |
from transformers import BitsAndBytesConfig | |
# models | |
base_model_name = "mistralai/Mistral-7B-Instruct-v0.2" | |
adapter_model_name = "TymofiiNas/results" | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.float16, | |
bnb_4bit_use_double_quant=False, | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
base_model_name, quantization_config=bnb_config, device_map={"": 0} | |
) | |
model = PeftModel.from_pretrained(model, adapter_model_name) | |
tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
def generate_response(text): | |
text = "<s> [INST]" + text + "[/INST]" | |
encoded_input = tokenizer(text, return_tensors="pt", add_special_tokens=False) | |
model_inputs = encoded_input.to("cuda") | |
generated_ids = model.generate( | |
**model_inputs, | |
max_new_tokens=400, | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id, | |
) | |
decoded_output = tokenizer.batch_decode(generated_ids) | |
return decoded_output[0][len(text) :] | |
demo = gr.Interface( | |
fn=generate_response, | |
inputs="text", | |
outputs="text", | |
) | |
gr.TabbedInterface([demo]).queue().launch() | |