DevashishBhake's picture
Update app.py
0102daf
raw
history blame
2.1 kB
import gradio as gr
from ctransformers import AutoModelForCausalLM
USER_NAME = "User"
BOT_NAME = "Falcon"
DEFAULT_INSTRUCTIONS = f"""The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins."""
STOP_STR = f"\n{USER_NAME}:"
STOP_SUSPECT_LIST = [":", "\n", "User"]
temperature = 0.8
top_p = 0.9
llm = AutoModelForCausalLM.from_pretrained("TheBloke/falcon-7b-instruct-GGML", model_file="falcon-7b-instruct.ggccv1.q4_0.bin",
model_type="falcon", threads=8)
def format_chat_prompt(message: str, instructions: str) -> str:
instructions = instructions.strip(" ").strip("\n")
prompt = instructions
prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:"
return prompt
def run_chat(message: str):
prompt = format_chat_prompt(message, DEFAULT_INSTRUCTIONS)
stream = llm(
prompt,
max_new_tokens=1024,
stop=[STOP_STR, "<|endoftext|>", USER_NAME],
temperature=temperature,
top_p=top_p,
stream=True
)
acc_text = ""
for idx, response in enumerate(stream):
text_token = response
if text_token in STOP_SUSPECT_LIST:
acc_text += text_token
continue
if idx == 0 and text_token.startswith(" "):
text_token = text_token[1:]
acc_text += text_token
return acc_text
demo = gr.Interface(
fn=run_chat,
inputs=gr.inputs.Textbox(label="Message"),
outputs=gr.outputs.Textbox(label="Generated Text"),
)
demo.launch()