Spaces:

israel
/

LLM-interaction

Sleeping

File size: 2,601 Bytes

38ae919
 
 
c76fb34
 
 
38ae919
 
c76fb34
38ae919
 
c76fb34
38ae919
c76fb34
 
38ae919
 
c76fb34
38ae919
 
c76fb34
 
5f95d4e
c76fb34
5f95d4e
 
 
 
 
 
 
 
 
 
 
 
 
38ae919

import streamlit as st


import torch
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


quantization_config = BitsAndBytesConfig(load_in_4bit=True)


model_name = "masakhane/zephyr-7b-gemma-sft-african-alpaca"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)


pipe = pipeline("text-generation", model=model,tokenizer=tokenizer, torch_dtype=torch.bfloat16, device_map="auto")


# import torch
# from transformers import pipeline

# pipe = pipeline("text-generation", model="masakhane/zephyr-7b-gemma-sft-african-alpaca", torch_dtype=torch.bfloat16, device_map="auto")

# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
# messages = [
#     {
#         "role": "system",
#         "content": "You are a friendly chatbot who answewrs question in given language",
#     },
#     {"role": "user", "content": "what is the 3 biggest countrys in Africa?"},
# ]
# prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
# print(outputs[0]["generated_text"])


if 'messages' not in st.session_state:
    st.session_state.messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who answewrs question in given language",
    },
]

def ask_model(question):
    # Placeholder for model interaction logic
    # You would replace this with actual code to query the model
    st.session_state.messages.append({"role": "user", "content": f"{question}"})

    prompt = pipe.tokenizer.apply_chat_template(st.session_state.messages, tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=1000, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    print(outputs[0]["generated_text"].split("<|assistant|>")[-1])

    st.session_state.messages.append({"role": "assistant", "content": f"{outputs[0]['generated_text'].split('<|assistant|>')[-1]}"})
    return st.session_state.messages

st.title('LLM Interaction Interface')

user_input = st.text_input("Ask a question:")

if user_input:
    # This function is supposed to send the question to the LLM and get the response
    response = ask_model(user_input)
    st.text_area("Response:", value=response[-1]['content'], height=300, max_chars=None, help=None)
    st.json({'value':response},expanded=False)