File size: 2,601 Bytes
38ae919
 
 
c76fb34
 
 
38ae919
 
c76fb34
38ae919
 
c76fb34
38ae919
c76fb34
 
38ae919
 
c76fb34
38ae919
 
c76fb34
 
5f95d4e
c76fb34
5f95d4e
 
 
 
 
 
 
 
 
 
 
 
 
38ae919
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import streamlit as st


import torch
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


quantization_config = BitsAndBytesConfig(load_in_4bit=True)


model_name = "masakhane/zephyr-7b-gemma-sft-african-alpaca"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)


pipe = pipeline("text-generation", model=model,tokenizer=tokenizer, torch_dtype=torch.bfloat16, device_map="auto")


# import torch
# from transformers import pipeline

# pipe = pipeline("text-generation", model="masakhane/zephyr-7b-gemma-sft-african-alpaca", torch_dtype=torch.bfloat16, device_map="auto")

# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
# messages = [
#     {
#         "role": "system",
#         "content": "You are a friendly chatbot who answewrs question in given language",
#     },
#     {"role": "user", "content": "what is the 3 biggest countrys in Africa?"},
# ]
# prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
# print(outputs[0]["generated_text"])


if 'messages' not in st.session_state:
    st.session_state.messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who answewrs question in given language",
    },
]

def ask_model(question):
    # Placeholder for model interaction logic
    # You would replace this with actual code to query the model
    st.session_state.messages.append({"role": "user", "content": f"{question}"})

    prompt = pipe.tokenizer.apply_chat_template(st.session_state.messages, tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=1000, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    print(outputs[0]["generated_text"].split("<|assistant|>")[-1])

    st.session_state.messages.append({"role": "assistant", "content": f"{outputs[0]['generated_text'].split('<|assistant|>')[-1]}"})
    return st.session_state.messages

st.title('LLM Interaction Interface')

user_input = st.text_input("Ask a question:")

if user_input:
    # This function is supposed to send the question to the LLM and get the response
    response = ask_model(user_input)
    st.text_area("Response:", value=response[-1]['content'], height=300, max_chars=None, help=None)
    st.json({'value':response},expanded=False)