slimfrikha-tii commited on
Commit
00cfe59
·
0 Parent(s):

falcon3 release

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +12 -0
  3. app.py +151 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Falcon3 Mamba 7b Instruct Playground
3
+ emoji: 🐍
4
+ colorFrom: yellow
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.0.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import sys
3
+ import shlex
4
+ import spaces
5
+ import torch
6
+ import uuid
7
+ import os
8
+ import json
9
+ from pathlib import Path
10
+ import gradio as gr
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
12
+ from threading import Thread
13
+
14
+
15
+ # install packages for mamba
16
+ def install_mamba():
17
+ subprocess.run(shlex.split("pip install https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.4.0/causal_conv1d-1.4.0+cu122torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"))
18
+ subprocess.run(shlex.split("pip install https://github.com/state-spaces/mamba/releases/download/v2.2.2/mamba_ssm-2.2.2+cu122torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"))
19
+
20
+ install_mamba()
21
+
22
+ MODEL = "tiiuae/Falcon3-Mamba-7B-Instruct"
23
+
24
+ TITLE = "<h1><center>Falcon3-Mamba-7B-Instruct playground</center></h1>"
25
+
26
+ SUB_TITLE = """<center>Playground of Falcon3-Mamba-7B-Instruct</center>"""
27
+ SYSTEM_PROMPT = os.getenv('SYSTEM_PROMPT')
28
+
29
+ CSS = """
30
+ .duplicate-button {
31
+ margin: auto !important;
32
+ color: white !important;
33
+ background: black !important;
34
+ border-radius: 100vh !important;
35
+ }
36
+ h3 {
37
+ text-align: center;
38
+ /* Fix for chat container */
39
+ .chat-container {
40
+ height: 600px !important;
41
+ overflow-y: auto !important;
42
+ flex-direction: column !important;
43
+ }
44
+ .messages-container {
45
+ flex-grow: 1 !important;
46
+ overflow-y: auto !important;
47
+ padding-right: 10px !important;
48
+ }
49
+ /* Ensure consistent height */
50
+ .contain {
51
+ height: 100% !important;
52
+ }
53
+ """
54
+
55
+ END_MESSAGE = """
56
+ \n
57
+ **The conversation has reached to its end, please press "Clear" to restart a new conversation**
58
+ """
59
+
60
+ device = "cuda" # for GPU usage or "cpu" for CPU usage
61
+
62
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
63
+ model = AutoModelForCausalLM.from_pretrained(
64
+ MODEL,
65
+ torch_dtype=torch.bfloat16,
66
+ ).to(device)
67
+
68
+ if device == "cuda":
69
+ model = torch.compile(model)
70
+
71
+ @spaces.GPU
72
+ def stream_chat(
73
+ message: str,
74
+ history: list,
75
+ temperature: float = 0.3,
76
+ max_new_tokens: int = 100,
77
+ top_p: float = 1.0,
78
+ top_k: int = 20,
79
+ penalty: float = 1.2,
80
+ ):
81
+ print(f'message: {message}')
82
+ print(f'history: {history}')
83
+
84
+ conversation = []
85
+ for prompt, answer in history:
86
+ conversation.extend([
87
+ {"role": 'system', "content": SYSTEM_PROMPT },
88
+ {"role": "user", "content": prompt},
89
+ {"role": "assistant", "content": answer},
90
+ ])
91
+
92
+ conversation.append({"role": "user", "content": message})
93
+
94
+ input_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
95
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
96
+ streamer = TextIteratorStreamer(tokenizer, timeout=40.0, skip_prompt=True, skip_special_tokens=True)
97
+
98
+ generate_kwargs = dict(
99
+ input_ids=inputs,
100
+ max_new_tokens=max_new_tokens,
101
+ do_sample=False if temperature == 0 else True,
102
+ top_p=top_p,
103
+ top_k=top_k,
104
+ temperature=temperature,
105
+ streamer=streamer,
106
+ pad_token_id=11,
107
+ )
108
+
109
+ with torch.no_grad():
110
+ thread = Thread(target=model.generate, kwargs=generate_kwargs)
111
+ thread.start()
112
+
113
+ buffer = ""
114
+ for new_text in streamer:
115
+ buffer += new_text
116
+ buffer = buffer.replace("\nUser", "")
117
+ buffer = buffer.replace("\nSystem", "")
118
+ yield buffer
119
+
120
+ print(f'response: {buffer}')
121
+
122
+ with gr.Blocks(css=CSS, theme="soft") as demo:
123
+ gr.HTML(TITLE)
124
+ gr.HTML(SUB_TITLE)
125
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
126
+
127
+ chat_interface = gr.ChatInterface(
128
+ fn=stream_chat,
129
+ chatbot=gr.Chatbot(
130
+ height=600,
131
+ container=True,
132
+ elem_classes=["chat-container"]
133
+ ),
134
+ fill_height=True,
135
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
136
+ additional_inputs=[
137
+ gr.Slider(minimum=0, maximum=1, step=0.1, value=0.3, label="Temperature", render=False),
138
+ gr.Slider(minimum=128, maximum=32768, step=1, value=1024, label="Max new tokens", render=False),
139
+ gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="top_p", render=False),
140
+ gr.Slider(minimum=1, maximum=20, step=1, value=20, label="top_k", render=False),
141
+ gr.Slider(minimum=0.0, maximum=2.0, step=0.1, value=1.2, label="Repetition penalty", render=False),
142
+ ],
143
+ examples=[
144
+ ["Hello there, can you suggest few places to visit in UAE?"],
145
+ ["What UAE is known for?"],
146
+ ],
147
+ cache_examples=False,
148
+ )
149
+
150
+ if __name__ == "__main__":
151
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ huggingface_hub==0.25.2
2
+ transformers
3
+ torch